{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "t5_fine-tuning",
      "provenance": [],
      "collapsed_sections": [
        "HVxGfmEMCKs_",
        "RKNr7fgzcKpZ",
        "vfhlYUUV2NIh",
        "b3C13iabZvwK",
        "qdEgCwL7cIyi",
        "W4cfw8bMcNdA",
        "brPOSAkjNP5t",
        "Dhqigmiw2hVh",
        "0B4IhzEgO21B",
        "cANrUEXhO8QY",
        "DEWi6c-pGZV9",
        "GwdWdHG0RP5J",
        "iq8M8nbTSJlE",
        "vZ-YLmJyg64T",
        "hOxk-ZoJmamm",
        "aVfmE4O3Ku7H",
        "AgNV3TMzqSvj"
      ],
      "machine_shape": "hm",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "7d8f60bfc0a248e58028b6e8a477a5f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_72dc1e39b931429883e68c0603797896",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_cde60c5e18f04ba792fff8c2ac33f470",
              "IPY_MODEL_c0c0df12695b4a1eacf8fa4ccc0ac62c"
            ]
          }
        },
        "72dc1e39b931429883e68c0603797896": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "cde60c5e18f04ba792fff8c2ac33f470": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_72ea881ce3f445a9983d858b76dd257b",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 791656,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 791656,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_d0f0c28a14b242f8990a547ed7f87c04"
          }
        },
        "c0c0df12695b4a1eacf8fa4ccc0ac62c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_f97741534b554be3b5cdccd45c73b317",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 792k/792k [02:08&lt;00:00, 6.18kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_1e70a3dc7090487fa883e932bff395cb"
          }
        },
        "72ea881ce3f445a9983d858b76dd257b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "d0f0c28a14b242f8990a547ed7f87c04": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f97741534b554be3b5cdccd45c73b317": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "1e70a3dc7090487fa883e932bff395cb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f414bac332054c7f86af89b8e50c7d73": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_1d9c52a1bb8843b6b0f151571cbf30a4",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_ed039b8125714030b03912fb29a93ca4",
              "IPY_MODEL_d9b445b8b3b04569adf22429259b4954"
            ]
          }
        },
        "1d9c52a1bb8843b6b0f151571cbf30a4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "ed039b8125714030b03912fb29a93ca4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_6c61b3c76d7045eb825172ba51b3fa63",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1199,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1199,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_d11ffd1efc024c1ca86276430d29fd1e"
          }
        },
        "d9b445b8b3b04569adf22429259b4954": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_22fac35d924f464ca0b33be21a566a86",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 1.20k/1.20k [00:20&lt;00:00, 58.3B/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_cfe128b0d2c648c18d2255b3f8506a09"
          }
        },
        "6c61b3c76d7045eb825172ba51b3fa63": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "d11ffd1efc024c1ca86276430d29fd1e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "22fac35d924f464ca0b33be21a566a86": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "cfe128b0d2c648c18d2255b3f8506a09": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "c34ac6d2548249819c1eab28956edec4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_de2c77b3fb0f4dba99f92062b2db5328",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_6ea23f0979824aac935f3f1ad10a86cd",
              "IPY_MODEL_6452bc3b5ad445a8a5e272207fe4504d"
            ]
          }
        },
        "de2c77b3fb0f4dba99f92062b2db5328": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "6ea23f0979824aac935f3f1ad10a86cd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_d6ef508766c54f8993d1d1f3d7cac040",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 891691430,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 891691430,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_1b69bbddeb244defab9e21690a45c79e"
          }
        },
        "6452bc3b5ad445a8a5e272207fe4504d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_4a2b56fd6780470ab1574509fa432183",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 892M/892M [00:17&lt;00:00, 51.3MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_3853231cd966465882a93fad9c5dc428"
          }
        },
        "d6ef508766c54f8993d1d1f3d7cac040": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "1b69bbddeb244defab9e21690a45c79e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "4a2b56fd6780470ab1574509fa432183": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "3853231cd966465882a93fad9c5dc428": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "915a0b65612243668570c555a47a6c37": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_c85b348624504af294b78de744969493",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_d56a6918840e4f6588af5da5f8f54015",
              "IPY_MODEL_41db48cf488a4522b1f04b33c2261262"
            ]
          }
        },
        "c85b348624504af294b78de744969493": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "d56a6918840e4f6588af5da5f8f54015": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_8c2d9ac8c22f486299949f4cbed16437",
            "_dom_classes": [],
            "description": "Validation sanity check: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_222974dba69145e7b171360bec239ba5"
          }
        },
        "41db48cf488a4522b1f04b33c2261262": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_9e95200811bb497ab0ac0229f5e0ddaa",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 5/5 [00:01&lt;00:00,  3.24it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_3773b14f23974ad3a5bbb7ff947e68ca"
          }
        },
        "8c2d9ac8c22f486299949f4cbed16437": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "222974dba69145e7b171360bec239ba5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "9e95200811bb497ab0ac0229f5e0ddaa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "3773b14f23974ad3a5bbb7ff947e68ca": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "3ec26f803d124dd0877e1ce0e3517f68": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_aabb0b2f2ae64684a80f1ea39c9a7d1b",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_885696e0606c4353a5d21feec03aebc7",
              "IPY_MODEL_659dd7302f3a40038834c4f1d8e59250"
            ]
          }
        },
        "aabb0b2f2ae64684a80f1ea39c9a7d1b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "885696e0606c4353a5d21feec03aebc7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_6f3859c80aa945e4b4ae2aa957755b7c",
            "_dom_classes": [],
            "description": "Epoch 2: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 3125,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 3125,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_a840a738d20b4f43baf18453db53fdf0"
          }
        },
        "659dd7302f3a40038834c4f1d8e59250": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_f7139c4e04374ffbafe6a849500c6369",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 3125/3125 [54:28&lt;00:00,  1.05s/it, loss=0.003, v_num=0, val_loss=0.0874]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_ef8f0b7c9b0c4f829e3ad59e83cbdd67"
          }
        },
        "6f3859c80aa945e4b4ae2aa957755b7c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "a840a738d20b4f43baf18453db53fdf0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f7139c4e04374ffbafe6a849500c6369": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "ef8f0b7c9b0c4f829e3ad59e83cbdd67": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "dbe7a4854b8f420faaea8de4583fb1f0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_4d1f674483d44e559ae1de553dd1d726",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_ce506c0137914e4db93b9db35154c62a",
              "IPY_MODEL_e92a181ff64d4e0290236a91cbdb8d67"
            ]
          }
        },
        "4d1f674483d44e559ae1de553dd1d726": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "ce506c0137914e4db93b9db35154c62a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_e8f7179c238e4d2d91d456b2c07e1b3e",
            "_dom_classes": [],
            "description": "Validating: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_e67100d71b5047158ab48ef0fd36cb99"
          }
        },
        "e92a181ff64d4e0290236a91cbdb8d67": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_17f7e321de81404dabaa3e84fadce2cf",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 250/250 [00:52&lt;00:00,  4.79it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_a15e2fcc467242cb9fad5b2082a70c39"
          }
        },
        "e8f7179c238e4d2d91d456b2c07e1b3e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "e67100d71b5047158ab48ef0fd36cb99": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "17f7e321de81404dabaa3e84fadce2cf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "a15e2fcc467242cb9fad5b2082a70c39": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f40c9bf16c9a473ba758a6439dce2652": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_8d17a251bf1440d4aa8513ad5f15ba1d",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_165319529b364183ae344a9a14f5bc52",
              "IPY_MODEL_3d0c08f3abbe421d83f2b35583221291"
            ]
          }
        },
        "8d17a251bf1440d4aa8513ad5f15ba1d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "165319529b364183ae344a9a14f5bc52": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_6e851577f682494c894b9afdd07b1201",
            "_dom_classes": [],
            "description": "Validating: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_e67e9e945a9c430f9844946cd81aae3a"
          }
        },
        "3d0c08f3abbe421d83f2b35583221291": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_34fbc6e29df046faaedd9fe3230559cb",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 250/250 [00:53&lt;00:00,  4.71it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_bbbdd81a2e8f4d68b33d698f45ccc9ae"
          }
        },
        "6e851577f682494c894b9afdd07b1201": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "e67e9e945a9c430f9844946cd81aae3a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "34fbc6e29df046faaedd9fe3230559cb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "bbbdd81a2e8f4d68b33d698f45ccc9ae": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "6aaf51cb9ad44c94b6a174a8768904f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_51d23e1199274477a69557c74609afb2",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_029f74818c6842d7a28af62032418880",
              "IPY_MODEL_8db144e9144141779a1088c4bc000a99"
            ]
          }
        },
        "51d23e1199274477a69557c74609afb2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "029f74818c6842d7a28af62032418880": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_210517aede4f4cfab9120fdeb3d8361a",
            "_dom_classes": [],
            "description": "100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 782,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 782,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_df9bc2dc2b3c4fee98affdd7f5ca1ef6"
          }
        },
        "8db144e9144141779a1088c4bc000a99": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_b684a47485af4cb1934d57cbb03a4f57",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 782/782 [10:38&lt;00:00,  1.22it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_942d20b134964d1d895af69938918464"
          }
        },
        "210517aede4f4cfab9120fdeb3d8361a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "df9bc2dc2b3c4fee98affdd7f5ca1ef6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "b684a47485af4cb1934d57cbb03a4f57": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "942d20b134964d1d895af69938918464": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "0037bb8409bb4d65ac4ebd956fd1e631": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_db528e3117024014b4d281b650901cbd",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_350fc08aa59849fc9fd3f3e454583a6c",
              "IPY_MODEL_be936dd408314d0d90a22f627ca517ca"
            ]
          }
        },
        "db528e3117024014b4d281b650901cbd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "350fc08aa59849fc9fd3f3e454583a6c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_99f56e1a8fdb4b2282fa6e17819d044e",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 791656,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 791656,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_462bd815ddbc4687bcf7695f59919f0c"
          }
        },
        "be936dd408314d0d90a22f627ca517ca": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_40edb7d92c1145ee9e3bb823e4688e16",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 792k/792k [00:06&lt;00:00, 131kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_f827cd8a6bf846c590913c5ea40e6737"
          }
        },
        "99f56e1a8fdb4b2282fa6e17819d044e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "462bd815ddbc4687bcf7695f59919f0c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "40edb7d92c1145ee9e3bb823e4688e16": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "f827cd8a6bf846c590913c5ea40e6737": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "61d58772a6a64c5c8ad30dab2563a56f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_4000e73e6d804763986dc9a9c74456aa",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_0dd99276ab294c939d83320f4674d5c2",
              "IPY_MODEL_d306f7ff1ec94561aeed9ff59ba9b54b"
            ]
          }
        },
        "4000e73e6d804763986dc9a9c74456aa": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "0dd99276ab294c939d83320f4674d5c2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_0893a9730450433fa76a74b008a6f482",
            "_dom_classes": [],
            "description": "Validation sanity check: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_f8873c7201e1410cb0ec52cb7e34c3c9"
          }
        },
        "d306f7ff1ec94561aeed9ff59ba9b54b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_234eb8b041c44358b2f993c2853162f7",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 5/5 [00:01&lt;00:00,  3.74it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_8f73da698e85474fbecfd91bb7770c56"
          }
        },
        "0893a9730450433fa76a74b008a6f482": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "f8873c7201e1410cb0ec52cb7e34c3c9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "234eb8b041c44358b2f993c2853162f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "8f73da698e85474fbecfd91bb7770c56": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "26a0cb124049417aa9dbdd010e3af03a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_8a14bd8f2a424b15b48426fd5e320678",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_09ed6242c5ef4a4791a1074ff7e4616e",
              "IPY_MODEL_487a6ea92fe0463ebbcb63094fde5136"
            ]
          }
        },
        "8a14bd8f2a424b15b48426fd5e320678": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "09ed6242c5ef4a4791a1074ff7e4616e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_c050be8414044acdb1a496495d148302",
            "_dom_classes": [],
            "description": "Epoch 2: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 2250,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 2250,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_56a67d534f284df0bc1121f1e264f5e2"
          }
        },
        "487a6ea92fe0463ebbcb63094fde5136": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_f168c4ae2d014e89bacc58e43427302e",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 2250/2250 [20:19&lt;00:00,  1.84it/s, loss=0.005, v_num=1, val_loss=0.0696]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_5cabe7d5ed6b46be882c558d28a29ca2"
          }
        },
        "c050be8414044acdb1a496495d148302": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "56a67d534f284df0bc1121f1e264f5e2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f168c4ae2d014e89bacc58e43427302e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "5cabe7d5ed6b46be882c558d28a29ca2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "1681a9ce7f9340caa50c4204777a6f9e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_a9f0c66f958e493286155c8d2631d255",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_e04d6312d5d4425ab726588c485e668c",
              "IPY_MODEL_fab8ee7d5d3940819eb9131efbbad791"
            ]
          }
        },
        "a9f0c66f958e493286155c8d2631d255": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "e04d6312d5d4425ab726588c485e668c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_6dd2781f88eb4549b4203dfec9c1a98e",
            "_dom_classes": [],
            "description": "Validating: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_893ba880ac6545baa6eb4a532ecc5753"
          }
        },
        "fab8ee7d5d3940819eb9131efbbad791": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_d4fc7ae628c94a758ce694318bc620ba",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 250/250 [00:48&lt;00:00,  5.24it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_4c33ca548b5e4738abdac09575e2a325"
          }
        },
        "6dd2781f88eb4549b4203dfec9c1a98e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "893ba880ac6545baa6eb4a532ecc5753": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "d4fc7ae628c94a758ce694318bc620ba": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "4c33ca548b5e4738abdac09575e2a325": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "ff475d6cdc074c14aa7b2cfede771b07": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_d77faf8b9ea6480abe594114823ca52f",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_ee4f41b591fe41a5a2d915c343b16c1d",
              "IPY_MODEL_d8946214acc44c4cb97688538daaa33f"
            ]
          }
        },
        "d77faf8b9ea6480abe594114823ca52f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "ee4f41b591fe41a5a2d915c343b16c1d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_9b9306452732495cbb1acd3e2fcf3b69",
            "_dom_classes": [],
            "description": "Validating: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_f42e9e596ad0485b842fee92d1884750"
          }
        },
        "d8946214acc44c4cb97688538daaa33f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_1d9f8718ba4d4b60997757ea7f1db72b",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 250/250 [00:48&lt;00:00,  5.22it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_63db466ae63b42a5a79d051ef5af653e"
          }
        },
        "9b9306452732495cbb1acd3e2fcf3b69": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "f42e9e596ad0485b842fee92d1884750": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "1d9f8718ba4d4b60997757ea7f1db72b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "63db466ae63b42a5a79d051ef5af653e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8933ab7f935e4776970ddfe35f5da135": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_84eb2bf17a9048fc94b6f47867d1b0ba",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_cdd7554792cf4c73922e2f050d1fcaaf",
              "IPY_MODEL_a32aa193a82f478387c14f384c2c689e"
            ]
          }
        },
        "84eb2bf17a9048fc94b6f47867d1b0ba": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "cdd7554792cf4c73922e2f050d1fcaaf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_e4cbd76c110541cbbf1386e299c4d9d6",
            "_dom_classes": [],
            "description": "100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 63,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 63,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_da67548f1abc4727965f72b8cb367681"
          }
        },
        "a32aa193a82f478387c14f384c2c689e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_63b11aa7ee0c4271aedb87ad3e7d23c3",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 63/63 [53:03&lt;00:00, 50.53s/it]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_720b90b3f86c4e5da15447777806e9a7"
          }
        },
        "e4cbd76c110541cbbf1386e299c4d9d6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "da67548f1abc4727965f72b8cb367681": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "63b11aa7ee0c4271aedb87ad3e7d23c3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "720b90b3f86c4e5da15447777806e9a7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "78b1b91a08214461b74fb1e143247d1e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_902a509471004d2691d807c4990fccd2",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_74ec15497e1743a4af6be12e3bc1487d",
              "IPY_MODEL_a70b457d9379403f9fac247de68bb8e3"
            ]
          }
        },
        "902a509471004d2691d807c4990fccd2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "74ec15497e1743a4af6be12e3bc1487d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_28f9d9aa0ece4831b0f9e412d8a88f8d",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 791656,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 791656,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_7640680e1006492da75d873726567fed"
          }
        },
        "a70b457d9379403f9fac247de68bb8e3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_1090e3e017564a2281c60fb53a901c75",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 792k/792k [00:04&lt;00:00, 191kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_9df2679ba627444e9b76bd2ff0ddc657"
          }
        },
        "28f9d9aa0ece4831b0f9e412d8a88f8d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "7640680e1006492da75d873726567fed": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "1090e3e017564a2281c60fb53a901c75": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "9df2679ba627444e9b76bd2ff0ddc657": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "5c7427d7db844b9691d30cf2de1efc17": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_bb0df1833ee3489da5c2a9c7b1306cc6",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_3d2817812b6f475a8c838fd14646469a",
              "IPY_MODEL_9d0f0c946790477fb8bc8bac64dfd7de"
            ]
          }
        },
        "bb0df1833ee3489da5c2a9c7b1306cc6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "3d2817812b6f475a8c838fd14646469a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_8254b8062d5e4280bea46f8bc444c5db",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1199,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1199,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_ab5f07ab5c574148a0062eb7f1ce5bcd"
          }
        },
        "9d0f0c946790477fb8bc8bac64dfd7de": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_47fdc2009efc443392ecd182996fcca9",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 1.20k/1.20k [00:42&lt;00:00, 28.4B/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_9b705e83fea84cbf912e33d6342be721"
          }
        },
        "8254b8062d5e4280bea46f8bc444c5db": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "ab5f07ab5c574148a0062eb7f1ce5bcd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "47fdc2009efc443392ecd182996fcca9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "9b705e83fea84cbf912e33d6342be721": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "e8e8ea6199df43019930ac7b557c46a5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_0566f29b017f47f399d7579d7929e046",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_932309f0a40b46659c0cac7cc37fdc05",
              "IPY_MODEL_da3665141bd44a24a5b5c9f36d4a9c52"
            ]
          }
        },
        "0566f29b017f47f399d7579d7929e046": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "932309f0a40b46659c0cac7cc37fdc05": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_5c98e3a5b6a6403a936a725f4c30cdd3",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 891691430,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 891691430,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_8da2b560fa9348098a2a7f09967d5f5f"
          }
        },
        "da3665141bd44a24a5b5c9f36d4a9c52": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_7e37cac227014717987922341f8099fe",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 892M/892M [00:38&lt;00:00, 23.2MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_b95f98f98a76434591f90d41b43e39ba"
          }
        },
        "5c98e3a5b6a6403a936a725f4c30cdd3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "8da2b560fa9348098a2a7f09967d5f5f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "7e37cac227014717987922341f8099fe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "b95f98f98a76434591f90d41b43e39ba": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8e79d03deee94b299431330441bd64c8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_510043ffee634f86b89ec3fc060a74ea",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_e86c5fbd48ce4215a0df353122183982",
              "IPY_MODEL_bfc3a5a3cf2e49868053db6f1ef7785d"
            ]
          }
        },
        "510043ffee634f86b89ec3fc060a74ea": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "e86c5fbd48ce4215a0df353122183982": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_361a2f79ed89495894d0b09a709f8f32",
            "_dom_classes": [],
            "description": "Validation sanity check: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_f7e53d55f0234627a3b9f2c90eb8682f"
          }
        },
        "bfc3a5a3cf2e49868053db6f1ef7785d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_3584c01b0c5e47dfa373bae29461e94a",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 5/5 [00:01&lt;00:00,  3.50it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_cfd9db6f31474a8189e741bf8fdad6a9"
          }
        },
        "361a2f79ed89495894d0b09a709f8f32": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "f7e53d55f0234627a3b9f2c90eb8682f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "3584c01b0c5e47dfa373bae29461e94a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "cfd9db6f31474a8189e741bf8fdad6a9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "68705cee3df5458fb5145046337d925c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_4cf1613d58bd450780ac95c994686985",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_3ee5f7cf56394175900ebb14ae0b5f9e",
              "IPY_MODEL_9f054dcf926c45459b7aa728493571a0"
            ]
          }
        },
        "4cf1613d58bd450780ac95c994686985": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "3ee5f7cf56394175900ebb14ae0b5f9e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_b52599dda9d94c83891d1c42c5f557e0",
            "_dom_classes": [],
            "description": "Epoch 3:   3%",
            "_model_name": "FloatProgressModel",
            "bar_style": "danger",
            "max": 11694,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 396,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_a1cf907a3bcc4177b1d5dd9edbf30c20"
          }
        },
        "9f054dcf926c45459b7aa728493571a0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_82b29ceeb21c417782e9e29a81eb47ea",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 396/11694 [03:40&lt;1:44:57,  1.79it/s, loss=0.017, v_num=0, val_loss=0.327]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_886260804ffd4e11bc93fb6e098111ab"
          }
        },
        "b52599dda9d94c83891d1c42c5f557e0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "a1cf907a3bcc4177b1d5dd9edbf30c20": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "82b29ceeb21c417782e9e29a81eb47ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "886260804ffd4e11bc93fb6e098111ab": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "69f6eb1cb0434128961b5d83529813c5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_6723d50588a248d0ad7bb118de8c3fd5",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_86d71b8233c14252a897ffa29ea6d9df",
              "IPY_MODEL_d01c708e22ab423896271fa79860e7c3"
            ]
          }
        },
        "6723d50588a248d0ad7bb118de8c3fd5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "86d71b8233c14252a897ffa29ea6d9df": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_0e8da5995754472fac5fba1f8b30d107",
            "_dom_classes": [],
            "description": "Validating: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_3dbee77f299f4e14a1698b60d609b8a1"
          }
        },
        "d01c708e22ab423896271fa79860e7c3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_8c4c9025aaae44148591ae6f8bb37347",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 2501/2501 [07:28&lt;00:00,  5.90it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_29e2f2f0914e4dea8117844675b42be5"
          }
        },
        "0e8da5995754472fac5fba1f8b30d107": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "3dbee77f299f4e14a1698b60d609b8a1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8c4c9025aaae44148591ae6f8bb37347": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "29e2f2f0914e4dea8117844675b42be5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "0cfc8fa73f164b4fa5ddcbc3f115ef9b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_4559bd35b33f4804b968debaaf316463",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_e403cc7718bf48f1b95150482e083f02",
              "IPY_MODEL_f6248a9db7f2466a9ab3a4fbd214f265"
            ]
          }
        },
        "4559bd35b33f4804b968debaaf316463": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": "row wrap",
            "width": "100%",
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": "inline-flex",
            "left": null
          }
        },
        "e403cc7718bf48f1b95150482e083f02": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_475e5353d31147d3ab156c0e7835684c",
            "_dom_classes": [],
            "description": "Validating: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "info",
            "max": 1,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_c3f65d683c6e4fe18e31ecc305f8d455"
          }
        },
        "f6248a9db7f2466a9ab3a4fbd214f265": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_9b50abad66b44022aa389bc3f312db6b",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 2501/2501 [07:25&lt;00:00,  5.90it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_762b2941ff3e47d89b6e6ce4350bc058"
          }
        },
        "475e5353d31147d3ab156c0e7835684c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "c3f65d683c6e4fe18e31ecc305f8d455": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": "2",
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "9b50abad66b44022aa389bc3f312db6b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "762b2941ff3e47d89b6e6ce4350bc058": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "1597779d89464892885045be715890a8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_8a42468ed6b945e8bfce1803f3ea4452",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_f87eae824cf1492b9555b78648a9f261",
              "IPY_MODEL_6cd0d574b5fd43588b8d492674125218"
            ]
          }
        },
        "8a42468ed6b945e8bfce1803f3ea4452": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f87eae824cf1492b9555b78648a9f261": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_17b25142ac744ba882e2bbd1f42c1db2",
            "_dom_classes": [],
            "description": "100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 626,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 626,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_09185d325ef84c1fad7b07fbd9eeed31"
          }
        },
        "6cd0d574b5fd43588b8d492674125218": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_ba31765789dc46229493674dab21921d",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 626/626 [06:35&lt;00:00,  1.58it/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_a9dd88fb73374e108482b80993b998eb"
          }
        },
        "17b25142ac744ba882e2bbd1f42c1db2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "09185d325ef84c1fad7b07fbd9eeed31": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "ba31765789dc46229493674dab21921d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "a9dd88fb73374e108482b80993b998eb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/patil-suraj/exploring-T5/blob/master/t5_fine_tuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PJX4vkjj6wYz",
        "colab_type": "code",
        "outputId": "83a8a420-48cd-4d49-bc60-2693268481c6",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 121
        }
      },
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n",
            "\n",
            "Enter your authorization code:\n",
            "··········\n",
            "Mounted at /content/drive\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1V5cInhu42Wk",
        "colab_type": "code",
        "outputId": "5501a5f1-fc49-4df7-f7a0-31cc37647337",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 302
        }
      },
      "source": [
        "!nvidia-smi"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Sat May  9 15:05:25 2020       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   34C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                       GPU Memory |\n",
            "|  GPU       PID   Type   Process name                             Usage      |\n",
            "|=============================================================================|\n",
            "|  No running processes found                                                 |\n",
            "+-----------------------------------------------------------------------------+\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "epWcPHhJ7v7j",
        "colab_type": "text"
      },
      "source": [
        "Instal apex if you want to do 16 bit training. You'll probably need to restart the notebook after installing apex"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "k1Xy7ZG-7gHt",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# !export CUDA_HOME=/usr/local/cuda-10.1\n",
        "# !git clone https://github.com/NVIDIA/apex\n",
        "# !pip install -v --no-cache-dir --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext\" ./apex"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "SDVQ04fGRb1v",
        "colab_type": "code",
        "outputId": "11689986-ca27-4ab0-f14d-5ee4f0eba40d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "!pip install transformers\n",
        "!pip install pytorch_lightning"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Collecting transformers\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/cd/38/c9527aa055241c66c4d785381eaf6f80a28c224cae97daa1f8b183b5fabb/transformers-2.9.0-py3-none-any.whl (635kB)\n",
            "\r\u001b[K     |▌                               | 10kB 20.5MB/s eta 0:00:01\r\u001b[K     |█                               | 20kB 1.7MB/s eta 0:00:01\r\u001b[K     |█▌                              | 30kB 2.3MB/s eta 0:00:01\r\u001b[K     |██                              | 40kB 2.6MB/s eta 0:00:01\r\u001b[K     |██▋                             | 51kB 2.0MB/s eta 0:00:01\r\u001b[K     |███                             | 61kB 2.3MB/s eta 0:00:01\r\u001b[K     |███▋                            | 71kB 2.5MB/s eta 0:00:01\r\u001b[K     |████▏                           | 81kB 2.7MB/s eta 0:00:01\r\u001b[K     |████▋                           | 92kB 3.0MB/s eta 0:00:01\r\u001b[K     |█████▏                          | 102kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████▊                          | 112kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████▏                         | 122kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████▊                         | 133kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████▏                        | 143kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████▊                        | 153kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████▎                       | 163kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████▊                       | 174kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████▎                      | 184kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████▉                      | 194kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████▎                     | 204kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████▉                     | 215kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████▍                    | 225kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████▉                    | 235kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████▍                   | 245kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████▉                   | 256kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████████▍                  | 266kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████                  | 276kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████▍                 | 286kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████                 | 296kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████▌                | 307kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████                | 317kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████▌               | 327kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████████████               | 337kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████████████▌              | 348kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████████              | 358kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████████▋             | 368kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████████             | 378kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████████▋            | 389kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████████            | 399kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████████▋           | 409kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████████████████▏          | 419kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████████████████▋          | 430kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████████████▏         | 440kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████████████▊         | 450kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████████████▏        | 460kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████████████▊        | 471kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████████████▎       | 481kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████████████▊       | 491kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████████████████████▎      | 501kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████████████████████▊      | 512kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████████████████▎     | 522kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████████████████▉     | 532kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████████████████▎    | 542kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████████████████▉    | 552kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████████████████▍   | 563kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████████████████▉   | 573kB 2.8MB/s eta 0:00:01\r\u001b[K     |█████████████████████████████▍  | 583kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████  | 593kB 2.8MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████▍ | 604kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████ | 614kB 2.8MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████▍| 624kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 634kB 2.8MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 645kB 2.8MB/s \n",
            "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n",
            "Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n",
            "Collecting tokenizers==0.7.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/14/e5/a26eb4716523808bb0a799fcfdceb6ebf77a18169d9591b2f46a9adb87d9/tokenizers-0.7.0-cp36-cp36m-manylinux1_x86_64.whl (3.8MB)\n",
            "\u001b[K     |████████████████████████████████| 3.8MB 12.8MB/s \n",
            "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n",
            "Collecting sacremoses\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n",
            "\u001b[K     |████████████████████████████████| 890kB 28.2MB/s \n",
            "\u001b[?25hCollecting sentencepiece\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/98/2c/8df20f3ac6c22ac224fff307ebc102818206c53fc454ecd37d8ac2060df5/sentencepiece-0.1.86-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n",
            "\u001b[K     |████████████████████████████████| 1.0MB 42.6MB/s \n",
            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.4)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.4.5.1)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.9)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.12.0)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.14.1)\n",
            "Building wheels for collected packages: sacremoses\n",
            "  Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893260 sha256=1d6422ddbf7526c5762d09193b36548e7b07bf9cc526057f833254f31a68c87c\n",
            "  Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n",
            "Successfully built sacremoses\n",
            "Installing collected packages: tokenizers, sacremoses, sentencepiece, transformers\n",
            "Successfully installed sacremoses-0.0.43 sentencepiece-0.1.86 tokenizers-0.7.0 transformers-2.9.0\n",
            "Collecting pytorch_lightning\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/75/ac/ac03f1f3fa950d96ca52f07d33fdbf5add05f164c1ac4eae179231dfa93d/pytorch_lightning-0.7.5-py3-none-any.whl (233kB)\n",
            "\u001b[K     |████████████████████████████████| 235kB 2.8MB/s \n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.16.4 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (1.18.4)\n",
            "Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (4.41.1)\n",
            "Collecting future>=0.17.1\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)\n",
            "\u001b[K     |████████████████████████████████| 829kB 8.4MB/s \n",
            "\u001b[?25hRequirement already satisfied: tensorboard>=1.14 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (2.2.1)\n",
            "Requirement already satisfied: torch>=1.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (1.5.0+cu101)\n",
            "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.12.0)\n",
            "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.6.0.post3)\n",
            "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.7.2)\n",
            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (3.2.1)\n",
            "Requirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.34.2)\n",
            "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.4.1)\n",
            "Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.28.1)\n",
            "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (3.10.0)\n",
            "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.9.0)\n",
            "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (2.23.0)\n",
            "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (46.1.3)\n",
            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.0.1)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (0.2.8)\n",
            "Requirement already satisfied: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (3.1.1)\n",
            "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (4.0)\n",
            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=1.14->pytorch_lightning) (1.3.0)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (2.9)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (2020.4.5.1)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (1.24.3)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (3.0.4)\n",
            "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (0.4.8)\n",
            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=1.14->pytorch_lightning) (3.1.0)\n",
            "Building wheels for collected packages: future\n",
            "  Building wheel for future (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for future: filename=future-0.18.2-cp36-none-any.whl size=491057 sha256=2748e4b7f9acd3e1e87b8118cdcb4cb5a4bf5ed682d99f3866e26265ab336042\n",
            "  Stored in directory: /root/.cache/pip/wheels/8b/99/a0/81daf51dcd359a9377b110a8a886b3895921802d2fc1b2397e\n",
            "Successfully built future\n",
            "Installing collected packages: future, pytorch-lightning\n",
            "  Found existing installation: future 0.16.0\n",
            "    Uninstalling future-0.16.0:\n",
            "      Successfully uninstalled future-0.16.0\n",
            "Successfully installed future-0.18.2 pytorch-lightning-0.7.5\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HVxGfmEMCKs_",
        "colab_type": "text"
      },
      "source": [
        "## T5 fine-tuning\n",
        "\n",
        "This notebook is to showcase how to fine-tune [T5 model](https://arxiv.org/abs/1910.10683) with Huggigface's [Transformers](https://github.com/huggingface/transformers/) to solve different NLP tasks using text-2-text approach proposed in the T5 paper. For demo I chose 3 non text-2-text problems just to reiterate the fact from the paper that how widely applicable this text-2-text framework is and how it can be used for different tasks without changing the model at all.\n",
        "\n",
        "This is a rough draft so if you find any issues with this notebook or have any  questions reach out to me via [Twitter](https://twitter.com/psuraj28).\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HS8mNXq6bdxq",
        "colab_type": "code",
        "outputId": "b0a32f10-f2ef-4d49-b433-266e8206040b",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 84
        }
      },
      "source": [
        "import argparse\n",
        "import glob\n",
        "import os\n",
        "import json\n",
        "import time\n",
        "import logging\n",
        "import random\n",
        "import re\n",
        "from itertools import chain\n",
        "from string import punctuation\n",
        "\n",
        "import nltk\n",
        "nltk.download('punkt')\n",
        "from nltk.tokenize import sent_tokenize\n",
        "\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import torch\n",
        "from torch.utils.data import Dataset, DataLoader\n",
        "import pytorch_lightning as pl\n",
        "\n",
        "\n",
        "from transformers import (\n",
        "    AdamW,\n",
        "    T5ForConditionalGeneration,\n",
        "    T5Tokenizer,\n",
        "    get_linear_schedule_with_warmup\n",
        ")"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.file_utils:PyTorch version 1.5.0+cu101 available.\n",
            "INFO:transformers.file_utils:TensorFlow version 2.2.0-rc4 available.\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IswYuhWaz7QJ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def set_seed(seed):\n",
        "  random.seed(seed)\n",
        "  np.random.seed(seed)\n",
        "  torch.manual_seed(seed)\n",
        "  if torch.cuda.is_available():\n",
        "    torch.cuda.manual_seed_all(seed)\n",
        "\n",
        "set_seed(42)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RKNr7fgzcKpZ",
        "colab_type": "text"
      },
      "source": [
        "## Model\n",
        "\n",
        "We'll be using the awesome [pytorch-lightning](https://github.com/PytorchLightning/pytorch-lightning) library for training. Most of the below code is adapted from here https://github.com/huggingface/transformers/blob/master/examples/lightning_base.py\n",
        "\n",
        "The trainer is generic and can be used for any text-2-text task. You'll just need to change the dataset. Rest of the code will stay unchanged for all the tasks.\n",
        "\n",
        "This is the most intresting and powrfull thing about the text-2-text format. You can fine-tune the model on variety of NLP tasks by just formulating the problem in text-2-text setting. No need to change hyperparameters, learning rate, optimizer or loss function. Just plug in your dataset and you are ready to go!"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "B7uVNBtXST5X",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "class T5FineTuner(pl.LightningModule):\n",
        "  def __init__(self, hparams):\n",
        "    super(T5FineTuner, self).__init__()\n",
        "    self.hparams = hparams\n",
        "    \n",
        "    self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path)\n",
        "    self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path)\n",
        "  \n",
        "  def is_logger(self):\n",
        "    return self.trainer.proc_rank <= 0\n",
        "  \n",
        "  def forward(\n",
        "      self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None, lm_labels=None\n",
        "  ):\n",
        "    return self.model(\n",
        "        input_ids,\n",
        "        attention_mask=attention_mask,\n",
        "        decoder_input_ids=decoder_input_ids,\n",
        "        decoder_attention_mask=decoder_attention_mask,\n",
        "        lm_labels=lm_labels,\n",
        "    )\n",
        "\n",
        "  def _step(self, batch):\n",
        "    lm_labels = batch[\"target_ids\"]\n",
        "    lm_labels[lm_labels[:, :] == self.tokenizer.pad_token_id] = -100\n",
        "\n",
        "    outputs = self(\n",
        "        input_ids=batch[\"source_ids\"],\n",
        "        attention_mask=batch[\"source_mask\"],\n",
        "        lm_labels=lm_labels,\n",
        "        decoder_attention_mask=batch['target_mask']\n",
        "    )\n",
        "\n",
        "    loss = outputs[0]\n",
        "\n",
        "    return loss\n",
        "\n",
        "  def training_step(self, batch, batch_idx):\n",
        "    loss = self._step(batch)\n",
        "\n",
        "    tensorboard_logs = {\"train_loss\": loss}\n",
        "    return {\"loss\": loss, \"log\": tensorboard_logs}\n",
        "  \n",
        "  def training_epoch_end(self, outputs):\n",
        "    avg_train_loss = torch.stack([x[\"loss\"] for x in outputs]).mean()\n",
        "    tensorboard_logs = {\"avg_train_loss\": avg_train_loss}\n",
        "    return {\"avg_train_loss\": avg_train_loss, \"log\": tensorboard_logs, 'progress_bar': tensorboard_logs}\n",
        "\n",
        "  def validation_step(self, batch, batch_idx):\n",
        "    loss = self._step(batch)\n",
        "    return {\"val_loss\": loss}\n",
        "  \n",
        "  def validation_epoch_end(self, outputs):\n",
        "    avg_loss = torch.stack([x[\"val_loss\"] for x in outputs]).mean()\n",
        "    tensorboard_logs = {\"val_loss\": avg_loss}\n",
        "    return {\"avg_val_loss\": avg_loss, \"log\": tensorboard_logs, 'progress_bar': tensorboard_logs}\n",
        "\n",
        "  def configure_optimizers(self):\n",
        "    \"Prepare optimizer and schedule (linear warmup and decay)\"\n",
        "\n",
        "    model = self.model\n",
        "    no_decay = [\"bias\", \"LayerNorm.weight\"]\n",
        "    optimizer_grouped_parameters = [\n",
        "        {\n",
        "            \"params\": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],\n",
        "            \"weight_decay\": self.hparams.weight_decay,\n",
        "        },\n",
        "        {\n",
        "            \"params\": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],\n",
        "            \"weight_decay\": 0.0,\n",
        "        },\n",
        "    ]\n",
        "    optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)\n",
        "    self.opt = optimizer\n",
        "    return [optimizer]\n",
        "  \n",
        "  def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None):\n",
        "    if self.trainer.use_tpu:\n",
        "      xm.optimizer_step(optimizer)\n",
        "    else:\n",
        "      optimizer.step()\n",
        "    optimizer.zero_grad()\n",
        "    self.lr_scheduler.step()\n",
        "  \n",
        "  def get_tqdm_dict(self):\n",
        "    tqdm_dict = {\"loss\": \"{:.3f}\".format(self.trainer.avg_loss), \"lr\": self.lr_scheduler.get_last_lr()[-1]}\n",
        "\n",
        "    return tqdm_dict\n",
        "\n",
        "  def train_dataloader(self):\n",
        "    train_dataset = get_dataset(tokenizer=self.tokenizer, type_path=\"train\", args=self.hparams)\n",
        "    dataloader = DataLoader(train_dataset, batch_size=self.hparams.train_batch_size, drop_last=True, shuffle=True, num_workers=4)\n",
        "    t_total = (\n",
        "        (len(dataloader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.n_gpu)))\n",
        "        // self.hparams.gradient_accumulation_steps\n",
        "        * float(self.hparams.num_train_epochs)\n",
        "    )\n",
        "    scheduler = get_linear_schedule_with_warmup(\n",
        "        self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=t_total\n",
        "    )\n",
        "    self.lr_scheduler = scheduler\n",
        "    return dataloader\n",
        "\n",
        "  def val_dataloader(self):\n",
        "    val_dataset = get_dataset(tokenizer=self.tokenizer, type_path=\"val\", args=self.hparams)\n",
        "    return DataLoader(val_dataset, batch_size=self.hparams.eval_batch_size, num_workers=4)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oh1R5C-GwMqx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "logger = logging.getLogger(__name__)\n",
        "\n",
        "class LoggingCallback(pl.Callback):\n",
        "  def on_validation_end(self, trainer, pl_module):\n",
        "    logger.info(\"***** Validation results *****\")\n",
        "    if pl_module.is_logger():\n",
        "      metrics = trainer.callback_metrics\n",
        "      # Log results\n",
        "      for key in sorted(metrics):\n",
        "        if key not in [\"log\", \"progress_bar\"]:\n",
        "          logger.info(\"{} = {}\\n\".format(key, str(metrics[key])))\n",
        "\n",
        "  def on_test_end(self, trainer, pl_module):\n",
        "    logger.info(\"***** Test results *****\")\n",
        "\n",
        "    if pl_module.is_logger():\n",
        "      metrics = trainer.callback_metrics\n",
        "\n",
        "      # Log and save results to file\n",
        "      output_test_results_file = os.path.join(pl_module.hparams.output_dir, \"test_results.txt\")\n",
        "      with open(output_test_results_file, \"w\") as writer:\n",
        "        for key in sorted(metrics):\n",
        "          if key not in [\"log\", \"progress_bar\"]:\n",
        "            logger.info(\"{} = {}\\n\".format(key, str(metrics[key])))\n",
        "            writer.write(\"{} = {}\\n\".format(key, str(metrics[key])))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "a4hjvsBJ5Zk5",
        "colab_type": "text"
      },
      "source": [
        "Let's define the hyperparameters and other arguments. You can overide this `dict` for specific task as needed. While in most of cases you'll only need to change the `data_dir`and `output_dir`.\n",
        "\n",
        "Here the batch size is 8 and gradient_accumulation_steps are 16 so the effective batch size is 128"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "urduopvizqTq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "args_dict = dict(\n",
        "    data_dir=\"\", # path for data files\n",
        "    output_dir=\"\", # path to save the checkpoints\n",
        "    model_name_or_path='t5-base',\n",
        "    tokenizer_name_or_path='t5-base',\n",
        "    max_seq_length=512,\n",
        "    learning_rate=3e-4,\n",
        "    weight_decay=0.0,\n",
        "    adam_epsilon=1e-8,\n",
        "    warmup_steps=0,\n",
        "    train_batch_size=8,\n",
        "    eval_batch_size=8,\n",
        "    num_train_epochs=2,\n",
        "    gradient_accumulation_steps=16,\n",
        "    n_gpu=1,\n",
        "    early_stop_callback=False,\n",
        "    fp_16=False, # if you want to enable 16-bit training then install apex and set this to true\n",
        "    opt_level='O1', # you can find out more on optimisation levels here https://nvidia.github.io/apex/amp.html#opt-levels-and-properties\n",
        "    max_grad_norm=1.0, # if you enable 16-bit training then set this to a sensible value, 0.5 is a good default\n",
        "    seed=42,\n",
        ")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vfhlYUUV2NIh",
        "colab_type": "text"
      },
      "source": [
        "## IMDB review classification"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "b3C13iabZvwK",
        "colab_type": "text"
      },
      "source": [
        "### Download IMDB Data"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7R0QdcgXuIWW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n",
        "!tar -xvf aclImdb_v1.tar.gz"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ni1cAK7EvXSB",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train_pos_files = glob.glob('aclImdb/train/pos/*.txt')\n",
        "train_neg_files = glob.glob('aclImdb/train/neg/*.txt')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jEsRn5pa0v8d",
        "colab_type": "code",
        "outputId": "6977ce56-d0b4-4d9f-8548-22003bb07eaf",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "len(train_pos_files), len(train_neg_files)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(12500, 12500)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 10
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5zgS8KhlaPiA",
        "colab_type": "text"
      },
      "source": [
        "We will use 2000 samples from the train set for validation. Let's choose 1000 postive reviews and 1000 negative reviews for validation and save them in the val directory"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hLvBHcXwzXrk",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!mkdir aclImdb/val aclImdb/val/pos aclImdb/val/neg"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IXZmLZ1pzjiY",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "random.shuffle(train_pos_files)\n",
        "random.shuffle(train_neg_files)\n",
        "\n",
        "val_pos_files = train_pos_files[:1000]\n",
        "val_neg_files = train_neg_files[:1000]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5yTS2Jx40UNu",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import shutil"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hJnJpkdb0ZKY",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "for f in val_pos_files:\n",
        "  shutil.move(f,  'aclImdb/val/pos')\n",
        "for f in val_neg_files:\n",
        "  shutil.move(f,  'aclImdb/val/neg')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "qdEgCwL7cIyi",
        "colab_type": "text"
      },
      "source": [
        "### Prepare Dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "McQC1FotigqA",
        "colab_type": "code",
        "outputId": "f60dbf68-32cf-44e1-9a2f-f9dba38cbbac",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 186,
          "referenced_widgets": [
            "7d8f60bfc0a248e58028b6e8a477a5f7",
            "72dc1e39b931429883e68c0603797896",
            "cde60c5e18f04ba792fff8c2ac33f470",
            "c0c0df12695b4a1eacf8fa4ccc0ac62c",
            "72ea881ce3f445a9983d858b76dd257b",
            "d0f0c28a14b242f8990a547ed7f87c04",
            "f97741534b554be3b5cdccd45c73b317",
            "1e70a3dc7090487fa883e932bff395cb"
          ]
        }
      },
      "source": [
        "tokenizer = T5Tokenizer.from_pretrained('t5-base')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:filelock:Lock 139780871368544 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n",
            "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpgy9lk1eo\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "7d8f60bfc0a248e58028b6e8a477a5f7",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n",
            "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n",
            "INFO:filelock:Lock 139780871368544 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n",
            "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wthd9SM74RG8",
        "colab_type": "code",
        "outputId": "52deb6bd-19c4-4071-8bcb-254925d8e4cc",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "ids_neg = tokenizer.encode('negative </s>')\n",
        "ids_pos = tokenizer.encode('positive </s>')\n",
        "len(ids_neg), len(ids_pos)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(2, 2)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "k5sJkyI3a723",
        "colab_type": "text"
      },
      "source": [
        "All the examples are converted in the text-2-text format as shown in the paper. However I didn't use any task prefix here. The examples are encoded as follows,\n",
        "if the review is positive then the target is 'positive' else 'negative'\n",
        "\n",
        "**input**:  I went to see this\n",
        "movie with my husband, and we both\n",
        "thought the acting was terrible!\"\n",
        "\n",
        "**target**: negative\n",
        "\n",
        "**input**:  Despite what others say,\n",
        "I thought this movie was funny.\n",
        "\n",
        "**target**: positive"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VEYmYHKGcxEq",
        "colab_type": "text"
      },
      "source": [
        "The dataset below takes care of reading the review files and processing the examples in text-2-text format.\n",
        "\n",
        "It cleans the review text by removing the html tags. It also appends the eos token `</s>` at the end of input and target as required by the T5 model \n",
        "\n",
        "For T5 max input length is 512 and we can choose the max length for target sequence depending upon our dataset. The `T5Tokenizer` encodes both 'postive' and 'negative' as a single ids so I chose the max target length 2, extra 1 for the `</s>` token"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IIY0GenSb72m",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "class ImdbDataset(Dataset):\n",
        "  def __init__(self, tokenizer, data_dir, type_path,  max_len=512):\n",
        "    self.pos_file_path = os.path.join(data_dir, type_path, 'pos')\n",
        "    self.neg_file_path = os.path.join(data_dir, type_path, 'neg')\n",
        "    \n",
        "    self.pos_files = glob.glob(\"%s/*.txt\" % self.pos_file_path)\n",
        "    self.neg_files = glob.glob(\"%s/*.txt\" % self.neg_file_path)\n",
        "    \n",
        "    self.max_len = max_len\n",
        "    self.tokenizer = tokenizer\n",
        "    self.inputs = []\n",
        "    self.targets = []\n",
        "\n",
        "    self._build()\n",
        "  \n",
        "  def __len__(self):\n",
        "    return len(self.inputs)\n",
        "  \n",
        "  def __getitem__(self, index):\n",
        "    source_ids = self.inputs[index][\"input_ids\"].squeeze()\n",
        "    target_ids = self.targets[index][\"input_ids\"].squeeze()\n",
        "\n",
        "    src_mask    = self.inputs[index][\"attention_mask\"].squeeze()  # might need to squeeze\n",
        "    target_mask = self.targets[index][\"attention_mask\"].squeeze()  # might need to squeeze\n",
        "\n",
        "    return {\"source_ids\": source_ids, \"source_mask\": src_mask, \"target_ids\": target_ids, \"target_mask\": target_mask}\n",
        "  \n",
        "  def _build(self):\n",
        "    self._buil_examples_from_files(self.pos_files, 'positive')\n",
        "    self._buil_examples_from_files(self.neg_files, 'negative')\n",
        "  \n",
        "  def _buil_examples_from_files(self, files, sentiment):\n",
        "    REPLACE_NO_SPACE = re.compile(\"[.;:!\\'?,\\\"()\\[\\]]\")\n",
        "    REPLACE_WITH_SPACE = re.compile(\"(<br\\s*/><br\\s*/>)|(\\-)|(\\/)\")\n",
        "\n",
        "    for path in files:\n",
        "      with open(path, 'r') as f:\n",
        "        text = f.read()\n",
        "      \n",
        "      line = text.strip()\n",
        "      line = REPLACE_NO_SPACE.sub(\"\", line) \n",
        "      line = REPLACE_WITH_SPACE.sub(\"\", line)\n",
        "      line = line + ' </s>'\n",
        "\n",
        "      target = sentiment + \" </s>\"\n",
        "\n",
        "       # tokenize inputs\n",
        "      tokenized_inputs = self.tokenizer.batch_encode_plus(\n",
        "          [line], max_length=self.max_len, pad_to_max_length=True, return_tensors=\"pt\"\n",
        "      )\n",
        "       # tokenize targets\n",
        "      tokenized_targets = self.tokenizer.batch_encode_plus(\n",
        "          [target], max_length=2, pad_to_max_length=True, return_tensors=\"pt\"\n",
        "      )\n",
        "\n",
        "      self.inputs.append(tokenized_inputs)\n",
        "      self.targets.append(tokenized_targets)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gsnsKY6jemsr",
        "colab_type": "code",
        "outputId": "98885b84-7f65-4d79-b470-619def772505",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "dataset = ImdbDataset(tokenizer, 'aclImdb', 'val',  max_len=512)\n",
        "len(dataset)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "2000"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 23
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7g1gz05ccAzg",
        "colab_type": "code",
        "outputId": "b3a263f1-8b22-46bf-9a33-f58c996d684a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 70
        }
      },
      "source": [
        "data = dataset[28]\n",
        "print(tokenizer.decode(data['source_ids']))\n",
        "print(tokenizer.decode(data['target_ids']))"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "To quote Flik that was my reaction exactly Wowyoure perfect This is the best movie I think I can even say its become my favorite movie ever even Wow I tell you what wow\n",
            "positive\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "W4cfw8bMcNdA",
        "colab_type": "text"
      },
      "source": [
        "### Train"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "aTvkv4rzhPjy",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!mkdir -p t5_imdb_sentiment"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "r5ngAP4OXFqZ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "args_dict.update({'data_dir': 'aclImdb', 'output_dir': 't5_imdb_sentiment', 'num_train_epochs':2})\n",
        "args = argparse.Namespace(**args_dict)\n",
        "\n",
        "checkpoint_callback = pl.callbacks.ModelCheckpoint(\n",
        "    filepath=args.output_dir, prefix=\"checkpoint\", monitor=\"val_loss\", mode=\"min\", save_top_k=5\n",
        ")\n",
        "\n",
        "train_params = dict(\n",
        "    accumulate_grad_batches=args.gradient_accumulation_steps,\n",
        "    gpus=args.n_gpu,\n",
        "    max_epochs=args.num_train_epochs,\n",
        "    early_stop_callback=False,\n",
        "    precision= 16 if args.fp_16 else 32,\n",
        "    amp_level=args.opt_level,\n",
        "    gradient_clip_val=args.max_grad_norm,\n",
        "    checkpoint_callback=checkpoint_callback,\n",
        "    callbacks=[LoggingCallback()],\n",
        ")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RJt_VqzEAMUg",
        "colab_type": "text"
      },
      "source": [
        "Define the `get_dataset` function to return the dataset. The model calls this function to get the train and val datasets. We are defining a dataset function so that we won't need to modify the model code at all. Redefine the function to return different dataset according to the problem. While this is not the best solution for now this works "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2h2aGPgp0vOf",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def get_dataset(tokenizer, type_path, args):\n",
        "  return ImdbDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path,  max_len=args.max_seq_length)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4IOQpawZA9XC",
        "colab_type": "text"
      },
      "source": [
        "**Initialize model**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kJsz3a4SilAF",
        "colab_type": "code",
        "outputId": "d711c5a7-4c7d-4392-8cf5-3df1cbcf2859",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "f414bac332054c7f86af89b8e50c7d73",
            "1d9c52a1bb8843b6b0f151571cbf30a4",
            "ed039b8125714030b03912fb29a93ca4",
            "d9b445b8b3b04569adf22429259b4954",
            "6c61b3c76d7045eb825172ba51b3fa63",
            "d11ffd1efc024c1ca86276430d29fd1e",
            "22fac35d924f464ca0b33be21a566a86",
            "cfe128b0d2c648c18d2255b3f8506a09",
            "c34ac6d2548249819c1eab28956edec4",
            "de2c77b3fb0f4dba99f92062b2db5328",
            "6ea23f0979824aac935f3f1ad10a86cd",
            "6452bc3b5ad445a8a5e272207fe4504d",
            "d6ef508766c54f8993d1d1f3d7cac040",
            "1b69bbddeb244defab9e21690a45c79e",
            "4a2b56fd6780470ab1574509fa432183",
            "3853231cd966465882a93fad9c5dc428"
          ]
        }
      },
      "source": [
        "model = T5FineTuner(args)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:filelock:Lock 139780702227256 acquired on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\n",
            "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp5_6vo8c2\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "f414bac332054c7f86af89b8e50c7d73",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1199.0, style=ProgressStyle(description…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json in cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n",
            "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n",
            "INFO:filelock:Lock 139780702227256 released on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\n",
            "INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n",
            "INFO:transformers.configuration_utils:Model config T5Config {\n",
            "  \"architectures\": [\n",
            "    \"T5WithLMHeadModel\"\n",
            "  ],\n",
            "  \"d_ff\": 3072,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 768,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"n_positions\": 512,\n",
            "  \"num_heads\": 12,\n",
            "  \"num_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"task_specific_params\": {\n",
            "    \"summarization\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"length_penalty\": 2.0,\n",
            "      \"max_length\": 200,\n",
            "      \"min_length\": 30,\n",
            "      \"no_repeat_ngram_size\": 3,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"summarize: \"\n",
            "    },\n",
            "    \"translation_en_to_de\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to German: \"\n",
            "    },\n",
            "    \"translation_en_to_fr\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to French: \"\n",
            "    },\n",
            "    \"translation_en_to_ro\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to Romanian: \"\n",
            "    }\n",
            "  },\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:filelock:Lock 139780702189776 acquired on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\n",
            "INFO:transformers.file_utils:https://cdn.huggingface.co/t5-base-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmps92w5ati\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "c34ac6d2548249819c1eab28956edec4",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891691430.0, style=ProgressStyle(descri…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.file_utils:storing https://cdn.huggingface.co/t5-base-pytorch_model.bin in cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n",
            "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n",
            "INFO:filelock:Lock 139780702189776 released on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\n",
            "INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\n",
            "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RSJytKv1BFyc",
        "colab_type": "text"
      },
      "source": [
        "**Initialize trainer**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PxO8OTA3irbw",
        "colab_type": "code",
        "outputId": "6ebd7f3f-09fe-4363-9869-24d39183d2ff",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 50
        }
      },
      "source": [
        "trainer = pl.Trainer(**train_params)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:lightning:GPU available: True, used: True\n",
            "INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Wo7cSSvFGEhe",
        "colab_type": "text"
      },
      "source": [
        "**start fine-tuning**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hVGd6imfizLP",
        "colab_type": "code",
        "outputId": "cca18a5f-7900-4f58-ed74-6684b72a54e1",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "915a0b65612243668570c555a47a6c37",
            "c85b348624504af294b78de744969493",
            "d56a6918840e4f6588af5da5f8f54015",
            "41db48cf488a4522b1f04b33c2261262",
            "8c2d9ac8c22f486299949f4cbed16437",
            "222974dba69145e7b171360bec239ba5",
            "9e95200811bb497ab0ac0229f5e0ddaa",
            "3773b14f23974ad3a5bbb7ff947e68ca",
            "3ec26f803d124dd0877e1ce0e3517f68",
            "aabb0b2f2ae64684a80f1ea39c9a7d1b",
            "885696e0606c4353a5d21feec03aebc7",
            "659dd7302f3a40038834c4f1d8e59250",
            "6f3859c80aa945e4b4ae2aa957755b7c",
            "a840a738d20b4f43baf18453db53fdf0",
            "f7139c4e04374ffbafe6a849500c6369",
            "ef8f0b7c9b0c4f829e3ad59e83cbdd67",
            "dbe7a4854b8f420faaea8de4583fb1f0",
            "4d1f674483d44e559ae1de553dd1d726",
            "ce506c0137914e4db93b9db35154c62a",
            "e92a181ff64d4e0290236a91cbdb8d67",
            "e8f7179c238e4d2d91d456b2c07e1b3e",
            "e67100d71b5047158ab48ef0fd36cb99",
            "17f7e321de81404dabaa3e84fadce2cf",
            "a15e2fcc467242cb9fad5b2082a70c39",
            "f40c9bf16c9a473ba758a6439dce2652",
            "8d17a251bf1440d4aa8513ad5f15ba1d",
            "165319529b364183ae344a9a14f5bc52",
            "3d0c08f3abbe421d83f2b35583221291",
            "6e851577f682494c894b9afdd07b1201",
            "e67e9e945a9c430f9844946cd81aae3a",
            "34fbc6e29df046faaedd9fe3230559cb",
            "bbbdd81a2e8f4d68b33d698f45ccc9ae"
          ]
        }
      },
      "source": [
        "trainer.fit(model)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:lightning:\n",
            "    | Name                                                                  | Type                       | Params\n",
            "-----------------------------------------------------------------------------------------------------------------\n",
            "0   | model                                                                 | T5ForConditionalGeneration | 222 M \n",
            "1   | model.shared                                                          | Embedding                  | 24 M  \n",
            "2   | model.encoder                                                         | T5Stack                    | 109 M \n",
            "3   | model.encoder.block                                                   | ModuleList                 | 84 M  \n",
            "4   | model.encoder.block.0                                                 | T5Block                    | 7 M   \n",
            "5   | model.encoder.block.0.layer                                           | ModuleList                 | 7 M   \n",
            "6   | model.encoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "7   | model.encoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "8   | model.encoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "9   | model.encoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "10  | model.encoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "11  | model.encoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "12  | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \n",
            "13  | model.encoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "14  | model.encoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \n",
            "15  | model.encoder.block.0.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "16  | model.encoder.block.0.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "17  | model.encoder.block.0.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "18  | model.encoder.block.0.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "19  | model.encoder.block.0.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "20  | model.encoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "21  | model.encoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \n",
            "22  | model.encoder.block.1                                                 | T5Block                    | 7 M   \n",
            "23  | model.encoder.block.1.layer                                           | ModuleList                 | 7 M   \n",
            "24  | model.encoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "25  | model.encoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "26  | model.encoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "27  | model.encoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "28  | model.encoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "29  | model.encoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "30  | model.encoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "31  | model.encoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \n",
            "32  | model.encoder.block.1.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "33  | model.encoder.block.1.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "34  | model.encoder.block.1.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "35  | model.encoder.block.1.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "36  | model.encoder.block.1.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "37  | model.encoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "38  | model.encoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \n",
            "39  | model.encoder.block.2                                                 | T5Block                    | 7 M   \n",
            "40  | model.encoder.block.2.layer                                           | ModuleList                 | 7 M   \n",
            "41  | model.encoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "42  | model.encoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "43  | model.encoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "44  | model.encoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "45  | model.encoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "46  | model.encoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "47  | model.encoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "48  | model.encoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \n",
            "49  | model.encoder.block.2.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "50  | model.encoder.block.2.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "51  | model.encoder.block.2.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "52  | model.encoder.block.2.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "53  | model.encoder.block.2.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "54  | model.encoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "55  | model.encoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \n",
            "56  | model.encoder.block.3                                                 | T5Block                    | 7 M   \n",
            "57  | model.encoder.block.3.layer                                           | ModuleList                 | 7 M   \n",
            "58  | model.encoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "59  | model.encoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "60  | model.encoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "61  | model.encoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "62  | model.encoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "63  | model.encoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "64  | model.encoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "65  | model.encoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \n",
            "66  | model.encoder.block.3.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "67  | model.encoder.block.3.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "68  | model.encoder.block.3.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "69  | model.encoder.block.3.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "70  | model.encoder.block.3.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "71  | model.encoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "72  | model.encoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \n",
            "73  | model.encoder.block.4                                                 | T5Block                    | 7 M   \n",
            "74  | model.encoder.block.4.layer                                           | ModuleList                 | 7 M   \n",
            "75  | model.encoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "76  | model.encoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "77  | model.encoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "78  | model.encoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "79  | model.encoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "80  | model.encoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "81  | model.encoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "82  | model.encoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \n",
            "83  | model.encoder.block.4.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "84  | model.encoder.block.4.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "85  | model.encoder.block.4.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "86  | model.encoder.block.4.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "87  | model.encoder.block.4.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "88  | model.encoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "89  | model.encoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \n",
            "90  | model.encoder.block.5                                                 | T5Block                    | 7 M   \n",
            "91  | model.encoder.block.5.layer                                           | ModuleList                 | 7 M   \n",
            "92  | model.encoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "93  | model.encoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "94  | model.encoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "95  | model.encoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "96  | model.encoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "97  | model.encoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "98  | model.encoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "99  | model.encoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \n",
            "100 | model.encoder.block.5.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "101 | model.encoder.block.5.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "102 | model.encoder.block.5.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "103 | model.encoder.block.5.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "104 | model.encoder.block.5.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "105 | model.encoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "106 | model.encoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \n",
            "107 | model.encoder.block.6                                                 | T5Block                    | 7 M   \n",
            "108 | model.encoder.block.6.layer                                           | ModuleList                 | 7 M   \n",
            "109 | model.encoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "110 | model.encoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "111 | model.encoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "112 | model.encoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "113 | model.encoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "114 | model.encoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "115 | model.encoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "116 | model.encoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \n",
            "117 | model.encoder.block.6.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "118 | model.encoder.block.6.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "119 | model.encoder.block.6.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "120 | model.encoder.block.6.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "121 | model.encoder.block.6.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "122 | model.encoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "123 | model.encoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \n",
            "124 | model.encoder.block.7                                                 | T5Block                    | 7 M   \n",
            "125 | model.encoder.block.7.layer                                           | ModuleList                 | 7 M   \n",
            "126 | model.encoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "127 | model.encoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "128 | model.encoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "129 | model.encoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "130 | model.encoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "131 | model.encoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "132 | model.encoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "133 | model.encoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \n",
            "134 | model.encoder.block.7.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "135 | model.encoder.block.7.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "136 | model.encoder.block.7.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "137 | model.encoder.block.7.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "138 | model.encoder.block.7.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "139 | model.encoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "140 | model.encoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \n",
            "141 | model.encoder.block.8                                                 | T5Block                    | 7 M   \n",
            "142 | model.encoder.block.8.layer                                           | ModuleList                 | 7 M   \n",
            "143 | model.encoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "144 | model.encoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "145 | model.encoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "146 | model.encoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "147 | model.encoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "148 | model.encoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "149 | model.encoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "150 | model.encoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \n",
            "151 | model.encoder.block.8.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "152 | model.encoder.block.8.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "153 | model.encoder.block.8.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "154 | model.encoder.block.8.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "155 | model.encoder.block.8.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "156 | model.encoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "157 | model.encoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \n",
            "158 | model.encoder.block.9                                                 | T5Block                    | 7 M   \n",
            "159 | model.encoder.block.9.layer                                           | ModuleList                 | 7 M   \n",
            "160 | model.encoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "161 | model.encoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "162 | model.encoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "163 | model.encoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "164 | model.encoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "165 | model.encoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "166 | model.encoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "167 | model.encoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \n",
            "168 | model.encoder.block.9.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "169 | model.encoder.block.9.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "170 | model.encoder.block.9.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "171 | model.encoder.block.9.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "172 | model.encoder.block.9.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "173 | model.encoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "174 | model.encoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \n",
            "175 | model.encoder.block.10                                                | T5Block                    | 7 M   \n",
            "176 | model.encoder.block.10.layer                                          | ModuleList                 | 7 M   \n",
            "177 | model.encoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "178 | model.encoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "179 | model.encoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "180 | model.encoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "181 | model.encoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "182 | model.encoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "183 | model.encoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "184 | model.encoder.block.10.layer.0.dropout                                | Dropout                    | 0     \n",
            "185 | model.encoder.block.10.layer.1                                        | T5LayerFF                  | 4 M   \n",
            "186 | model.encoder.block.10.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "187 | model.encoder.block.10.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "188 | model.encoder.block.10.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "189 | model.encoder.block.10.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "190 | model.encoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "191 | model.encoder.block.10.layer.1.dropout                                | Dropout                    | 0     \n",
            "192 | model.encoder.block.11                                                | T5Block                    | 7 M   \n",
            "193 | model.encoder.block.11.layer                                          | ModuleList                 | 7 M   \n",
            "194 | model.encoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "195 | model.encoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "196 | model.encoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "197 | model.encoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "198 | model.encoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "199 | model.encoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "200 | model.encoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "201 | model.encoder.block.11.layer.0.dropout                                | Dropout                    | 0     \n",
            "202 | model.encoder.block.11.layer.1                                        | T5LayerFF                  | 4 M   \n",
            "203 | model.encoder.block.11.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "204 | model.encoder.block.11.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "205 | model.encoder.block.11.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "206 | model.encoder.block.11.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "207 | model.encoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "208 | model.encoder.block.11.layer.1.dropout                                | Dropout                    | 0     \n",
            "209 | model.encoder.final_layer_norm                                        | T5LayerNorm                | 768   \n",
            "210 | model.encoder.dropout                                                 | Dropout                    | 0     \n",
            "211 | model.decoder                                                         | T5Stack                    | 137 M \n",
            "212 | model.decoder.block                                                   | ModuleList                 | 113 M \n",
            "213 | model.decoder.block.0                                                 | T5Block                    | 9 M   \n",
            "214 | model.decoder.block.0.layer                                           | ModuleList                 | 9 M   \n",
            "215 | model.decoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "216 | model.decoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "217 | model.decoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "218 | model.decoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "219 | model.decoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "220 | model.decoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \n",
            "222 | model.decoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "223 | model.decoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \n",
            "224 | model.decoder.block.0.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "225 | model.decoder.block.0.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "226 | model.decoder.block.0.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "227 | model.decoder.block.0.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "228 | model.decoder.block.0.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "229 | model.decoder.block.0.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding                  | 384   \n",
            "231 | model.decoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "232 | model.decoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \n",
            "233 | model.decoder.block.0.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "234 | model.decoder.block.0.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "235 | model.decoder.block.0.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "236 | model.decoder.block.0.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "237 | model.decoder.block.0.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "238 | model.decoder.block.0.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "239 | model.decoder.block.0.layer.2.dropout                                 | Dropout                    | 0     \n",
            "240 | model.decoder.block.1                                                 | T5Block                    | 9 M   \n",
            "241 | model.decoder.block.1.layer                                           | ModuleList                 | 9 M   \n",
            "242 | model.decoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "243 | model.decoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "244 | model.decoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "245 | model.decoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "246 | model.decoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "247 | model.decoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "248 | model.decoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "249 | model.decoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \n",
            "250 | model.decoder.block.1.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "251 | model.decoder.block.1.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "252 | model.decoder.block.1.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "253 | model.decoder.block.1.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "254 | model.decoder.block.1.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "255 | model.decoder.block.1.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "256 | model.decoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "257 | model.decoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \n",
            "258 | model.decoder.block.1.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "259 | model.decoder.block.1.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "260 | model.decoder.block.1.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "261 | model.decoder.block.1.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "262 | model.decoder.block.1.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "263 | model.decoder.block.1.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "264 | model.decoder.block.1.layer.2.dropout                                 | Dropout                    | 0     \n",
            "265 | model.decoder.block.2                                                 | T5Block                    | 9 M   \n",
            "266 | model.decoder.block.2.layer                                           | ModuleList                 | 9 M   \n",
            "267 | model.decoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "268 | model.decoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "269 | model.decoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "270 | model.decoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "271 | model.decoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "272 | model.decoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "273 | model.decoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "274 | model.decoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \n",
            "275 | model.decoder.block.2.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "276 | model.decoder.block.2.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "277 | model.decoder.block.2.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "278 | model.decoder.block.2.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "279 | model.decoder.block.2.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "280 | model.decoder.block.2.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "281 | model.decoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "282 | model.decoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \n",
            "283 | model.decoder.block.2.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "284 | model.decoder.block.2.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "285 | model.decoder.block.2.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "286 | model.decoder.block.2.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "287 | model.decoder.block.2.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "288 | model.decoder.block.2.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "289 | model.decoder.block.2.layer.2.dropout                                 | Dropout                    | 0     \n",
            "290 | model.decoder.block.3                                                 | T5Block                    | 9 M   \n",
            "291 | model.decoder.block.3.layer                                           | ModuleList                 | 9 M   \n",
            "292 | model.decoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "293 | model.decoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "294 | model.decoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "295 | model.decoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "296 | model.decoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "297 | model.decoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "298 | model.decoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "299 | model.decoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \n",
            "300 | model.decoder.block.3.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "301 | model.decoder.block.3.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "302 | model.decoder.block.3.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "303 | model.decoder.block.3.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "304 | model.decoder.block.3.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "305 | model.decoder.block.3.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "306 | model.decoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "307 | model.decoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \n",
            "308 | model.decoder.block.3.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "309 | model.decoder.block.3.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "310 | model.decoder.block.3.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "311 | model.decoder.block.3.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "312 | model.decoder.block.3.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "313 | model.decoder.block.3.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "314 | model.decoder.block.3.layer.2.dropout                                 | Dropout                    | 0     \n",
            "315 | model.decoder.block.4                                                 | T5Block                    | 9 M   \n",
            "316 | model.decoder.block.4.layer                                           | ModuleList                 | 9 M   \n",
            "317 | model.decoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "318 | model.decoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "319 | model.decoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "320 | model.decoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "321 | model.decoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "322 | model.decoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "323 | model.decoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "324 | model.decoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \n",
            "325 | model.decoder.block.4.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "326 | model.decoder.block.4.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "327 | model.decoder.block.4.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "328 | model.decoder.block.4.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "329 | model.decoder.block.4.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "330 | model.decoder.block.4.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "331 | model.decoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "332 | model.decoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \n",
            "333 | model.decoder.block.4.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "334 | model.decoder.block.4.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "335 | model.decoder.block.4.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "336 | model.decoder.block.4.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "337 | model.decoder.block.4.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "338 | model.decoder.block.4.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "339 | model.decoder.block.4.layer.2.dropout                                 | Dropout                    | 0     \n",
            "340 | model.decoder.block.5                                                 | T5Block                    | 9 M   \n",
            "341 | model.decoder.block.5.layer                                           | ModuleList                 | 9 M   \n",
            "342 | model.decoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "343 | model.decoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "344 | model.decoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "345 | model.decoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "346 | model.decoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "347 | model.decoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "348 | model.decoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "349 | model.decoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \n",
            "350 | model.decoder.block.5.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "351 | model.decoder.block.5.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "352 | model.decoder.block.5.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "353 | model.decoder.block.5.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "354 | model.decoder.block.5.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "355 | model.decoder.block.5.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "356 | model.decoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "357 | model.decoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \n",
            "358 | model.decoder.block.5.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "359 | model.decoder.block.5.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "360 | model.decoder.block.5.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "361 | model.decoder.block.5.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "362 | model.decoder.block.5.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "363 | model.decoder.block.5.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "364 | model.decoder.block.5.layer.2.dropout                                 | Dropout                    | 0     \n",
            "365 | model.decoder.block.6                                                 | T5Block                    | 9 M   \n",
            "366 | model.decoder.block.6.layer                                           | ModuleList                 | 9 M   \n",
            "367 | model.decoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "368 | model.decoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "369 | model.decoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "370 | model.decoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "371 | model.decoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "372 | model.decoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "373 | model.decoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "374 | model.decoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \n",
            "375 | model.decoder.block.6.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "376 | model.decoder.block.6.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "377 | model.decoder.block.6.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "378 | model.decoder.block.6.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "379 | model.decoder.block.6.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "380 | model.decoder.block.6.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "381 | model.decoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "382 | model.decoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \n",
            "383 | model.decoder.block.6.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "384 | model.decoder.block.6.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "385 | model.decoder.block.6.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "386 | model.decoder.block.6.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "387 | model.decoder.block.6.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "388 | model.decoder.block.6.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "389 | model.decoder.block.6.layer.2.dropout                                 | Dropout                    | 0     \n",
            "390 | model.decoder.block.7                                                 | T5Block                    | 9 M   \n",
            "391 | model.decoder.block.7.layer                                           | ModuleList                 | 9 M   \n",
            "392 | model.decoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "393 | model.decoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "394 | model.decoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "395 | model.decoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "396 | model.decoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "397 | model.decoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "398 | model.decoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "399 | model.decoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \n",
            "400 | model.decoder.block.7.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "401 | model.decoder.block.7.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "402 | model.decoder.block.7.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "403 | model.decoder.block.7.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "404 | model.decoder.block.7.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "405 | model.decoder.block.7.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "406 | model.decoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "407 | model.decoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \n",
            "408 | model.decoder.block.7.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "409 | model.decoder.block.7.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "410 | model.decoder.block.7.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "411 | model.decoder.block.7.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "412 | model.decoder.block.7.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "413 | model.decoder.block.7.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "414 | model.decoder.block.7.layer.2.dropout                                 | Dropout                    | 0     \n",
            "415 | model.decoder.block.8                                                 | T5Block                    | 9 M   \n",
            "416 | model.decoder.block.8.layer                                           | ModuleList                 | 9 M   \n",
            "417 | model.decoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "418 | model.decoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "419 | model.decoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "420 | model.decoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "421 | model.decoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "422 | model.decoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "423 | model.decoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "424 | model.decoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \n",
            "425 | model.decoder.block.8.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "426 | model.decoder.block.8.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "427 | model.decoder.block.8.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "428 | model.decoder.block.8.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "429 | model.decoder.block.8.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "430 | model.decoder.block.8.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "431 | model.decoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "432 | model.decoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \n",
            "433 | model.decoder.block.8.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "434 | model.decoder.block.8.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "435 | model.decoder.block.8.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "436 | model.decoder.block.8.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "437 | model.decoder.block.8.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "438 | model.decoder.block.8.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "439 | model.decoder.block.8.layer.2.dropout                                 | Dropout                    | 0     \n",
            "440 | model.decoder.block.9                                                 | T5Block                    | 9 M   \n",
            "441 | model.decoder.block.9.layer                                           | ModuleList                 | 9 M   \n",
            "442 | model.decoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "443 | model.decoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "444 | model.decoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "445 | model.decoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "446 | model.decoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "447 | model.decoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "448 | model.decoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "449 | model.decoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \n",
            "450 | model.decoder.block.9.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "451 | model.decoder.block.9.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "452 | model.decoder.block.9.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "453 | model.decoder.block.9.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "454 | model.decoder.block.9.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "455 | model.decoder.block.9.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "456 | model.decoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "457 | model.decoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \n",
            "458 | model.decoder.block.9.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "459 | model.decoder.block.9.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "460 | model.decoder.block.9.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "461 | model.decoder.block.9.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "462 | model.decoder.block.9.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "463 | model.decoder.block.9.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "464 | model.decoder.block.9.layer.2.dropout                                 | Dropout                    | 0     \n",
            "465 | model.decoder.block.10                                                | T5Block                    | 9 M   \n",
            "466 | model.decoder.block.10.layer                                          | ModuleList                 | 9 M   \n",
            "467 | model.decoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "468 | model.decoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "469 | model.decoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "470 | model.decoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "471 | model.decoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "472 | model.decoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "473 | model.decoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "474 | model.decoder.block.10.layer.0.dropout                                | Dropout                    | 0     \n",
            "475 | model.decoder.block.10.layer.1                                        | T5LayerCrossAttention      | 2 M   \n",
            "476 | model.decoder.block.10.layer.1.EncDecAttention                        | T5Attention                | 2 M   \n",
            "477 | model.decoder.block.10.layer.1.EncDecAttention.q                      | Linear                     | 589 K \n",
            "478 | model.decoder.block.10.layer.1.EncDecAttention.k                      | Linear                     | 589 K \n",
            "479 | model.decoder.block.10.layer.1.EncDecAttention.v                      | Linear                     | 589 K \n",
            "480 | model.decoder.block.10.layer.1.EncDecAttention.o                      | Linear                     | 589 K \n",
            "481 | model.decoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "482 | model.decoder.block.10.layer.1.dropout                                | Dropout                    | 0     \n",
            "483 | model.decoder.block.10.layer.2                                        | T5LayerFF                  | 4 M   \n",
            "484 | model.decoder.block.10.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "485 | model.decoder.block.10.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "486 | model.decoder.block.10.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "487 | model.decoder.block.10.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "488 | model.decoder.block.10.layer.2.layer_norm                             | T5LayerNorm                | 768   \n",
            "489 | model.decoder.block.10.layer.2.dropout                                | Dropout                    | 0     \n",
            "490 | model.decoder.block.11                                                | T5Block                    | 9 M   \n",
            "491 | model.decoder.block.11.layer                                          | ModuleList                 | 9 M   \n",
            "492 | model.decoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "493 | model.decoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "494 | model.decoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "495 | model.decoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "496 | model.decoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "497 | model.decoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "498 | model.decoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "499 | model.decoder.block.11.layer.0.dropout                                | Dropout                    | 0     \n",
            "500 | model.decoder.block.11.layer.1                                        | T5LayerCrossAttention      | 2 M   \n",
            "501 | model.decoder.block.11.layer.1.EncDecAttention                        | T5Attention                | 2 M   \n",
            "502 | model.decoder.block.11.layer.1.EncDecAttention.q                      | Linear                     | 589 K \n",
            "503 | model.decoder.block.11.layer.1.EncDecAttention.k                      | Linear                     | 589 K \n",
            "504 | model.decoder.block.11.layer.1.EncDecAttention.v                      | Linear                     | 589 K \n",
            "505 | model.decoder.block.11.layer.1.EncDecAttention.o                      | Linear                     | 589 K \n",
            "506 | model.decoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "507 | model.decoder.block.11.layer.1.dropout                                | Dropout                    | 0     \n",
            "508 | model.decoder.block.11.layer.2                                        | T5LayerFF                  | 4 M   \n",
            "509 | model.decoder.block.11.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "510 | model.decoder.block.11.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "511 | model.decoder.block.11.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "512 | model.decoder.block.11.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "513 | model.decoder.block.11.layer.2.layer_norm                             | T5LayerNorm                | 768   \n",
            "514 | model.decoder.block.11.layer.2.dropout                                | Dropout                    | 0     \n",
            "515 | model.decoder.final_layer_norm                                        | T5LayerNorm                | 768   \n",
            "516 | model.decoder.dropout                                                 | Dropout                    | 0     \n",
            "517 | model.lm_head                                                         | Linear                     | 24 M  \n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "915a0b65612243668570c555a47a6c37",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\r"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "3ec26f803d124dd0877e1ce0e3517f68",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "/pytorch/torch/csrc/utils/python_arg_parser.cpp:756: UserWarning: This overload of add_ is deprecated:\n",
            "\tadd_(Number alpha, Tensor other)\n",
            "Consider using one of the following signatures instead:\n",
            "\tadd_(Tensor other, *, Number alpha)\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "dbe7a4854b8f420faaea8de4583fb1f0",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:***** Validation results *****\n",
            "INFO:__main__:avg_val_loss = tensor(0.0839, device='cuda:0')\n",
            "\n",
            "INFO:__main__:loss = tensor(0.0199, device='cuda:0')\n",
            "\n",
            "INFO:__main__:train_loss = tensor(0.0199, device='cuda:0')\n",
            "\n",
            "INFO:__main__:val_loss = tensor(0.0839, device='cuda:0')\n",
            "\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "f40c9bf16c9a473ba758a6439dce2652",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:***** Validation results *****\n",
            "INFO:__main__:avg_train_loss = tensor(0.2954, device='cuda:0')\n",
            "\n",
            "INFO:__main__:avg_val_loss = tensor(0.0874, device='cuda:0')\n",
            "\n",
            "INFO:__main__:epoch = 0\n",
            "\n",
            "INFO:__main__:loss = tensor(0.0066, device='cuda:0')\n",
            "\n",
            "INFO:__main__:train_loss = tensor(0.0066, device='cuda:0')\n",
            "\n",
            "INFO:__main__:val_loss = tensor(0.0874, device='cuda:0')\n",
            "\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "1"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 30
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "l-obOz6v70iB",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!mkdir t5_base_imdb_sentiment"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OQBJcrrWi2vC",
        "colab_type": "code",
        "outputId": "a98adf77-6e23-4304-8ccc-5b13a33a2a32",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 50
        }
      },
      "source": [
        "## save the model this way so next time you can load it using T5ForConditionalGeneration.from_pretrained\n",
        "model.model.save_pretrained('t5_base_imdb_sentiment')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.configuration_utils:Configuration saved in t5_base_imdb_sentiment/config.json\n",
            "INFO:transformers.modeling_utils:Model weights saved in t5_base_imdb_sentiment/pytorch_model.bin\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XhjELPOk7-cz",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# !cp -r t5_base_imdb_sentiment drive/My\\ Drive/"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "brPOSAkjNP5t",
        "colab_type": "text"
      },
      "source": [
        "### Eval"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_7SuVh05lDrJ",
        "colab_type": "text"
      },
      "source": [
        "For inference we will use the `generate` method with greedy decoding with max length 2."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "25jbT49CVoXN",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import textwrap\n",
        "from tqdm.auto import tqdm\n",
        "from sklearn import metrics"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cyriGR20lSRa",
        "colab_type": "text"
      },
      "source": [
        "Let's visualize few predictions on test dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wwJ998sMz2Ci",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "dataset = ImdbDataset(tokenizer, 'aclImdb', 'test',  max_len=512)\n",
        "loader = DataLoader(dataset, batch_size=32, shuffle=True)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2LQtN5b90TyW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "it = iter(loader)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TRD03teH0YMe",
        "colab_type": "code",
        "outputId": "d43041e6-5d7d-49d5-e91a-7530c5d1d6b1",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "batch = next(it)\n",
        "batch[\"source_ids\"].shape"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "torch.Size([32, 512])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 36
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "eewDktozk7GN",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \n",
        "                              attention_mask=batch['source_mask'].cuda(), \n",
        "                              max_length=2)\n",
        "\n",
        "dec = [tokenizer.decode(ids) for ids in outs]\n",
        "\n",
        "texts = [tokenizer.decode(ids) for ids in batch['source_ids']]\n",
        "targets = [tokenizer.decode(ids) for ids in batch['target_ids']]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9vBe0UNw7cHY",
        "colab_type": "code",
        "outputId": "2f0171ac-8d7d-41db-db31-d57bf72bc205",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "for i in range(32):\n",
        "    lines = textwrap.wrap(\"Review:\\n%s\\n\" % texts[i], width=100)\n",
        "    print(\"\\n\".join(lines))\n",
        "    print(\"\\nActual sentiment: %s\" % targets[i])\n",
        "    print(\"Predicted sentiment: %s\" % dec[i])\n",
        "    print(\"=====================================================================\\n\")"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Review: I dont know where to begin Perhaps the whole idea of this movie was just a disaster waiting\n",
            "to happen There is nothing slightly humorous about a kidnapping I dont know what was more\n",
            "offensivethe subject matter or David Arquettes performance It was like watching a bull get its penis\n",
            "cut off although I think the bull felt better afterwards The filmmakers should find something about\n",
            "Sinatra other than his sons kidnapping to show like I dont know his TALENT AS A SINGER His family\n",
            "shouldnt have to relive that horror Thank GOD it was just shown on HBO and not released in theaters\n",
            "Please dont watch this if you have any self respect\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: A fine performance by Vittorio Mezzogiorno and a masterful one by JeanHugues Anglade adorn\n",
            "this stange tale of lust desire and alienation in France The work of the two lead performers is\n",
            "strikingsubtle intense and passionate Alas the script is deliberately turgid and sordid and the\n",
            "overall effect leaves one with a downcast spirit Still those who can appreciate fine quality acting\n",
            "will be able to savor the courageous work of the leads in this often difficult film journey of\n",
            "Gallic low life\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: I almost stopped watching Hindi movies because of the mediocre quality and story lines One\n",
            "exception for this is Ramgopal Verma movies This is a nice movie with great performances from the\n",
            "star cast This is must see movie for those who are sick of watching stupid dancing and love stories\n",
            "The adaptation of the story and characterization was exceptional goodYou should watch this movie for\n",
            "Nana Patekar based on the life of Mumbai cop Daya Naik this movie deals in a more realistic way The\n",
            "film delves into the life of the common man which he has apart from being an encounter specialist I\n",
            "rate this as one of the best movie of the year\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: This was a really interesting Halloween film I wasnt to thrilled with the whole Thorn theory\n",
            "but it still makes for a good film I liked getting to see Tommy Doyle back but sadly Donald\n",
            "Pleasance died right after shooting The film had a really REALLY bad director who didnt give a flip\n",
            "about the series from what I heard treated Donald bad and wouldnt let Danielle Harris come back as\n",
            "Jamie Its like he was just trying to bring down the film but I still liked it There were alot of\n",
            "cuts and music changes and if youre lucky you can get the Producers Cut which features over 40 min\n",
            "of never before scenes With those scenes it turns into a whole new movie Check it out if you have\n",
            "the chance\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: Cheerleader Massacre was supposed to be the fourth installment of the Slumber Party Massacre\n",
            "series if thats what they were doing which it is considering ONE actress from the original returns\n",
            "in a small cameo role they have failed miserably and made by far the worst installment of the\n",
            "quadrilogy Cheerleader Massacre seamlessly combines bad acting a horrible plot a dumb killer dull\n",
            "and boring deaths boring scenery and hideous camera work to make it one of the worst films ever made\n",
            "Did I already mention how bad it was Dont get me wrong this cheesy and retarded excuse for a horror\n",
            "film is nowhere near as bad as Napoleon Dynamite but it is undeniably a horrible movieCheerleader\n",
            "Massacre is an exact polar opposite of the original Slumber Party Massacre Stay away by all means\n",
            "This movie is utter garbage\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: This movie will give me nightmares I will wake up drenched in sweat screaming I didnt make\n",
            "this film please dont blame me I honestly think it would have been more entertaining to watch a fat\n",
            "guy eating lard in his moms basement for a hour or two than to watch this crap I understand money\n",
            "was tight but goddamn what the hell were they thinking there was no thought plot or effort put into\n",
            "this This movie needs a warning Please for the love of god dont fund the drama department a the\n",
            "local JC On an other note these are the least likable characters I have ever seen and I have seen\n",
            "movies with Hitler in them So lastly take my advice the next time you even think about renting this\n",
            "just pop a few hundred Adivl and let the sleep come\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: In this movie Virtual Sexuality the 17 year old Justine is not lucky in love One day when\n",
            "she is stood up she goes with her friend to a virual reality conference there she is introduced with\n",
            "a machine that can change your look dody and whatever you like in Virtual Reality She decides to try\n",
            "it out but begins to make a boyfriend of her own her dreamdate Then suddenly there is an explosion\n",
            "in a gas pipe and her creation comes to life Ill say no more youll have to watch the movie which is\n",
            "quite fun to watch\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: Why does C Thomas Howell do these movies Cruise Howells one time costar does a huge\n",
            "blockbuster of WOTW and Howell follows with this lame effortWhere do I start here Production Values\n",
            "Ill start with the good stuff The look and feel of some of the scenes in this movie are not too bad\n",
            "to be honest The setups are okay in spots and the direction not too badScript Terrible A series of\n",
            "clunky scenes that could have been put in any order you like permeate throughout the movie The\n",
            "amount of times the scene faded to black and reemerged a second later in the same room was\n",
            "uncountable Very poor storyline but so was the Cruise WOTW takes some blame but an abysmal\n",
            "screenplay kills it offSpecial FX Okay I dont want to be too harsh here as I imagine the budget was\n",
            "smaller than Cruises lunch bill but in the overall context of the film the effects are badly done\n",
            "Some shots are quite impressive mainly far off destruction shots of bridges Washington liner But in\n",
            "the main the alien machines and tentacles themselves are dreadful Also the camera quality is fuzzy\n",
            "on some shots and cuts away entirely on othersActing Im a fan of Howell but as he has reduced\n",
            "himself to acting in these lowbudget flicks he has succumbed to the overacting bug a long time ago\n",
            "Look at his performance in The Hitcher and compare it to this movie There is no comparison He\n",
            "overdoes his facial expressions his flailing arms and legs where did he get that running style and\n",
            "for a final coupdegras look at the scene where he loses the photo of his family Hysterical But after\n",
            "saying all that he is still the best actor on show here Busey is embarrassing to look at and Peter\n",
            "Green Zed is truly dead now baby mumbles incoherently through his one and only scene I honestly\n",
            "could not understand one word he said I even went so far as trying to enable the subtitles on that\n",
            "scene but the DVD did not have subtitles This seems to be a real keepitinthefamily affair too as\n",
            "Howells son the directors wife and the line producer all make it into the film None of them are\n",
            "goodDirection not bad but not good eitherScore DismalOverall a lame duck effort that will don'thing\n",
            "for Howell in his\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: Another Asian horror movie packed with intense and creepy moments Another Asian horror\n",
            "trademark is the complexity of the plot which is here as well MAJOR SPOILER WARNINGThe movie starts\n",
            "pretty simple two sisters go to live with their dad and stepmother after being put in a mental\n",
            "institution after their mother hanged herself The sisters seem very hostile towards their mother\n",
            "especially the elder one and they seem to ignore their father All goes smoothly until the mother\n",
            "locks the young sister in the wardrobe and the elder sister tells her father Then it hits you your\n",
            "sister has been dead for years now It turns out the older sister is still not recovered from the\n",
            "death of her mother and what we didnt know is that the wardrobe the mother was hanged in fell on the\n",
            "younger sister and killed her as wellAs for the stepmother she is the alter ego of the older sister\n",
            "revealed when the stepmother actually the sisters alter ego is sitting on a couch when the real\n",
            "stepmother walks in I hope it has been made clearer for confused Asian horror fans out thereFinally\n",
            "my favourite scene is the scene where the father invites friends over for dinner and one of the\n",
            "friends starts to choke which erupts into a panic attack Very creepy 7 out of 10\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: As a native of New Orleans I can state that almost everything in this movie from the\n",
            "atrocious NAwlins dialect to the highly creative manipulation of Crescent City geography is horrible\n",
            "This is another one of those Big Hollywood movies that decides to stereotype New Orleans as 1 A city\n",
            "full of Frenchsounding idiots 2 A city full of people who sound as if theyve just returned from\n",
            "Blanche Dubois summer home 3 A city of drunkards where every day is Mardi Gras 4 A city of deep\n",
            "mystery where almost everyone practices or is a victim of voodoo I admit that maybe we are a city of\n",
            "drunkards although every day is NOT Mardi Gras The Big Easy is one of the worst films about New\n",
            "Orleans I wouldnt recommend it to anybody\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: This selfimportant confusing b+w film watches like an infant on a very bad acid trip Youre\n",
            "dealing with something that reminds you of a piece of rotting lettuce that accidentally fell out of\n",
            "the back of a garbage truck no one cares to touch it because it will probably be washed away on its\n",
            "own down the storm drain Theres no room for plot when youve got visceral imagery and subtle allegory\n",
            "To me it seems like the director tries to make the next great art movie while begging for\n",
            "intellectual accolades I didnt bring my beret either Watching this I felt almost insulted since the\n",
            "film does such an effective job of distancing itself from you\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: Mercy the movie actually starts out as a somewhat decent film and ellen barkin does give a\n",
            "strong performance But if you have read the book and actually got to know the characters and cared\n",
            "who done it the movie just does not compare It is always hard to brink a book onto film and\n",
            "unfortunatley this one ends up failing 3 out of 10\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: What in the world This piece of gambling cinema would have been suitable for the Lifetime\n",
            "Network Michael Imperoli is a good actor but I think his portrayal as Stu fell short The montages\n",
            "were unbearable and too many The supporting cast where are you Whoever did the casting should be\n",
            "partially at fault The cinematography was useless A gambling story with an after school feel to it\n",
            "Stories of this sort should be left for the Oliver Stones of the world It would still suck ass but\n",
            "at least it would be fun to watch It was an attempt that lost its wheels before the race ever begun\n",
            "Mario Andretti in the 1982 Indy 500 came to mind\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: This movie definitely made me laugh but that doesnt mean it was exactly funny Well then\n",
            "again me and my friends had a lot of fun watching itI doubt there is anything about this movie that\n",
            "hasnt been done at least twice before just like the plot itself All of the characters are overused\n",
            "movie cliché cardboardbox roles that dont even require acting skills accordingly such skills are not\n",
            "delivered We have the corrupt cop a ruthless killer who claims to care about his men and their\n",
            "families whilst caring nothing about people he shoots in the forehead at so close a range as to have\n",
            "blood spat on his face We have the wornout cop on the edge so nicely pointed at in the discussion\n",
            "boards of this movie we have the old onedayawayfromretirementcop who just about everyone must have\n",
            "immediately identified as the most likely man on the inside since he had most to gain and he didnt\n",
            "utter a trustworthy word throughout the movie About as seethrough as a glass house on a sunny day\n",
            "The big black gangster king was a copy of all previous big black gangster kings in movie history\n",
            "they couldve just called him Marcellus Wallace but just slightly tougher and more ruthless because\n",
            "something has to emphasize that we also know Laurence Fishburne from actually good movies Then we\n",
            "finally have the HIGHLY EDUCATED doctor who cant think of anything reasonable to do as soon as the\n",
            "situation differs from her ordinary life and who spends the majority of the movie sitting in a\n",
            "corner helplessly trying to figure out how to hold on to the weapon she was given NOT USING ITThe\n",
            "whole siege story is not interesting not original having been used twice before and this movie\n",
            "manages to add absolutely nothing interesting to it There is the initial probe then the laying of\n",
            "the siege then the assault then the escape attempts Meanwhile a bunch of strained stressed freaked\n",
            "out cops and thugs manage to hold off a Police assault team with hightech equipment and the quite\n",
            "important advantage of VISION Then again in deep night with the power cut and with a snow storm\n",
            "raging overhead there is definitely a lot of light coming in so who really cares about night\n",
            "visionBut the best part comes right at the end In the first scenes showing Precinct 13 we see it is\n",
            "situated in an outskirt of an industrial city factories and office buildings surround it on all\n",
            "sides From this point\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: Some people say this show was good in its early years I disagree with all of em The show is\n",
            "just plain stupid and pathetic My mum hates it I hate it my dad hates it I dont know about my sister\n",
            "but oh well Here some reasons why1 THE CHARACTERS Babies being used as grown up style characters are\n",
            "stupid The babies are just precocious and annoying The grown ups and adults are dumb and unappealing\n",
            "The worst character is that Angelica Pickles she really does it in for your ear drums when you had a\n",
            "long hard and miserable day at the office and also that Kimi Finster who appears later on she is too\n",
            "over optimistic and a pain in the butt She cant decided whither she is French or Japanese it doesnt\n",
            "matter know you are a American Citizen know and thats that Oh what am I talking about all the\n",
            "characters from this show suck2 THE STORIES The stories are unoriginal and dumb The make it like the\n",
            "babies go off on a great adventure yeah to the back yard shed In one episode that little goofy brat\n",
            "Tommy Pickles the Leader broke in to a televisions control room and literally almost destroyed it\n",
            "Dont give kids any idea to smash up normal TV Stations control rooms they pay a awful lot of money\n",
            "for them in real life I can imagine what the broadcasters must of felt like airing this episode they\n",
            "will probably start staring at their machines throughout the day scared that a baby will brake in\n",
            "Sad3 OVER RATED The show has been dragging on for years now and people are still making up stories\n",
            "and new series and spinoffs for this Get off The Simpsons have been going for nearly the same amount\n",
            "of time as this but they are much better and funnier than babies The show is just plain over rated\n",
            "People where is your common senseAnyway I surprised TV Stations across the world want to air this\n",
            "series even off today The show is utter junk and should have never been produced The two movies for\n",
            "this cartoons sucked just the same 210\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: What can I saynot much to this one at all Pretty dull and uninterestingThe actors\n",
            "performances are just OK The only one that shines in any way is Simmons but he only has maybe 3\n",
            "scenes I understand that by keeping his screen time to a minimum he retains the mysterious psychic\n",
            "aura he has but I cant help but feel his talent was wasted No one else rose above mediocreThe story\n",
            "itself seems like it may be intriguing at the beginning but then just doesnt go anywhere There wasnt\n",
            "a single scene in the movie that impressed me or made me feel like I had just seen something special\n",
            "The cinematography was fairly blandI mean desert in a washed out sort of sepianot very inspiringThe\n",
            "story of his childhood pal back outta prison seemed only partially thought out and didnt really add\n",
            "anything to the story other than making an average Twilight Zone script into a full length\n",
            "featureDrab\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: I kept watching it because it seemed like the plot was going somewhere When it ambiguously\n",
            "got there I was very disappointed Im going to tell you what really happened in the next sentence But\n",
            "maybe I wont Maybe Ill just imply something will happen The writers lacked any imagination This is\n",
            "not even a B movie its a made for TV B movie\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: Straight up I love this film I love everything about it It has a great soundtrack it has a\n",
            "lot of recognizable faces and it is funny as hell There are so many plots in this film and every one\n",
            "of them is funny in one way or anotherWhere as Spicolli lit up the screen two years back Drake is\n",
            "almost as memorable of a character All he wants to do is have fun He moves out of the house without\n",
            "his parents consent he skips work whenever he feels like it he is obsessed with sex he loves his\n",
            "drugs and booze and he tries to be a good friend It is his lacksidaisical attitude that makes him\n",
            "such a joy to watch And he comes out with some great lines And there are so many tiny observations\n",
            "that you dont see coming but they make you laugh at the sheer velocity when it hits you One\n",
            "particular moment is when Tommy and Bill are talking about Bills ex girlfriend dating someone else\n",
            "now At the end of the conversation Tommy takes his huge beer bottle and just throws it over his\n",
            "shoulder casually He then says good night and the scene ends It is a perfect scene Tommys world is\n",
            "his own He really lives to party and have fun When the conversation is over his time is over and he\n",
            "doesnt care who he offends in the process He has an innocence about him Its casual is his favourite\n",
            "sayingAnother such classic scene is Reggie handing Bill a donut He says something to him that me and\n",
            "my friends will never forget because we rewound the film ten times and watched that part over and\n",
            "over again and hurt ourselves laughing It has to be seen to be appreciatedWild Life is a throw back\n",
            "to when teen comedies were funny raunchy had a good ear entertained us and just wanted us to get\n",
            "lost in their world for 90 minutes Wild Life does all those things perfectly If this is a film that\n",
            "you havent seen give it a chance It is a classicAlso check out the army store guy that Jim has\n",
            "problems with He is a very familiar face now and it is his first role on the big screen\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: This movie is a real shame not just for the plotthe empty performance of the characters it\n",
            "is for the lack of creativity from the director and all the crew this is maybe one of the worst\n",
            "movies of all timesand it is hard to believe that is the sequel of one of the most famous movies of\n",
            "the 90sI am a great fan of The Mask when I went to see this movie I was expecting to a movie with a\n",
            "good sense of humor a movie with a acceptable plot instead I saw a really bad copy of Chuck Jones\n",
            "and Tex Avery cartoons the movie was not funny even for my 7 years old sister so I wonderWhat was\n",
            "wrong New Line CinemaWas it trying to repeat the success of the first movie or was it trying to\n",
            "create another masterpiece like The Lord of the RingsBecause if they did they were completely out of\n",
            "their minds\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: eXistenZ is simply David Cronenbergs best movie All the people compare it to the Matrix\n",
            "Theyre not even similar If you enjoyed Cronenbergs other works just a little bit youll love this one\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: First time I ever felt I needed to write a reviewI have seen thousands of movies in my life\n",
            "and I like a wide range of movies I am reasonably opened minded and can easily say I enjoyed a movie\n",
            "while still saying yeah it was not good but I enjoyed it I can appreciate the mastery of great\n",
            "movies like The Shaw shank redemption the godfather and American history X I can like good movies in\n",
            "a genre like horror or comedy even if the movie might not be that great I can even enjoy a bad movie\n",
            "that just happens to entertain me Bloodsport I also will try to rate movie fairly even if I did not\n",
            "like it City lights by Charlie Chapin was not a movie I enjoyed but I can appreciate the acting and\n",
            "story lines for the timeI think some people when they go on this site instead of randomly click a\n",
            "rating should take a few ideas into account Try to rate the movie based on how good it actually was\n",
            "Do not let your personal bias affect the rating Also look at other moves you rated and compare the\n",
            "movie you are going to rateThis movie was the worst piece of trash I have ever seen 2 hours of my\n",
            "life where just stolen The acting was awful across the board The scenes where choppy at best However\n",
            "the real disgrace was the story The first 20 minutes we actually had a story that tried to make\n",
            "sense and take the viewer from point A to B However after that it was a nightmare They kept trying\n",
            "to add new elements but nothing was every explained Nothing really ever made sense was steward dead\n",
            "is he alive did he hit by lighting was it really lighting was it aliens is he an alien etc The\n",
            "ending tied nothing together and really did not answer any questions The only positive was nobody\n",
            "cared we where just happy to leave the theater6510 What is wrong with some of you I will admit that\n",
            "the 8 of us where so mad about seeing this we did think what would make it better and we decide to\n",
            "tell a few of our friends that this movie was good so they would have to suffer and see this movie\n",
            "What can I say misery loves company That is really the only reason I can see for a 65 ratingDo not\n",
            "waste your life\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: My main criticism with the movie is the animation I totally agree with everyone else it was\n",
            "very poor Some of the characters seemed to have darker skin tones than they did in the first film\n",
            "which is much better Also the background colours looked rushed and somewhat static It is also a\n",
            "shame that Michael JFox didnt voice Milo he did such a good job and James Arnold Taylor wasnt sure\n",
            "whether he was supposed to sound like Milo or Aladdin I have also taken into consideration the lack\n",
            "of a good storyline the third story was confusing and clumsily told and the second story suffered\n",
            "from poor scripting To make things worse the first one I cant even remember other than a fishing\n",
            "village being haunted or something like that However there was some nice music and good voice\n",
            "talents from John Mahoney Cree Summer Clancy Brown and Tom Wilson that saved the film from total\n",
            "disaster All in all a disappointing sequel to a surprisingly good film 410 Bethany Cox\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: Giant Robot was the most popular Japanese TV serial ever seen on Indian TV It was targeted\n",
            "to children and we saw a robot for the first time in our life Many Indian children must have even\n",
            "seen a machine for the first time outside the school textbooks The serial also showed a child in an\n",
            "adults organization fighting evil No doubt many of us who have seen Giant Robot in our childhood\n",
            "long for our own robots and as a stopgap arrangement look upon our computers in the same way This\n",
            "show also portrayed ideal adults referring at Jerry Johnnys buddy friend and Unicorn chief Azuma We\n",
            "grew to respect Japanese progress and still view Japan as the ideal Asian nationBTW at that time\n",
            "there were no satellite TV channels in India and the govt owned broadcaster did not show much of\n",
            "Disney cartoons I guess that was how child serials like giant Robot got appreciated Nowadays there\n",
            "is Pokemon etc but they are no so fascinating or alluring as Giant robot\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: This might sound weird but I only got to see the first movie The Emperors New Groove\n",
            "yaddayadda a week ago and only because of one episode of the TV show I simply adore Kuzcos character\n",
            "but Kronk isnt that bad either Anyway eventually I decided to watch the second film just so I\n",
            "wouldve seen it Hoped it would be as good as the first one but Im sorry to tell this but the more\n",
            "the humour got American the more I yawned I agreed with Kuzco when he started crying seeing all the\n",
            "cheesy footageStill younger kids and probably veterans too will love this movie to bits if they like\n",
            "the old school moralising Disney that is but I just had expectations that were an eensy teensy\n",
            "little bit hell of a lot higher than they shouldve been Kronk is a lovely character being good\n",
            "hearted and dumb all at once but it were Pacha and Kuzco in drag that woke me up at the end of the\n",
            "movie Ill ignore Rudy for as far thats possibleAnyway great movie just not my style and as they say\n",
            "you always have to be true to your groove\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: The problem I have as a Finn is that most of the actors in this movie are in every Finnish\n",
            "movie I have a feeling that Finland has only like five actors I think that if youre not from Finland\n",
            "you really like this movie as a refreshing noveltyThis movie is about a dreadful chain of events\n",
            "that affects a few people quite harshly Alcoholism cold climate and darkness may all be clichés but\n",
            "theyre still very real in todays Finnish society A lot of people in Finland have depression\n",
            "especially during winterThe tone of the movie is very melancholic I enjoyed it and Louhimes\n",
            "directing was again very solid I liked this movie a lot only negative thing is that you see the same\n",
            "faces that youve seen over and over again\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: Most of the criticism of Attack of Show is from people who are unfairly comparing it to an\n",
            "old computer TV program called The Screen Savers People are upset because G4 decided to cancel the\n",
            "Screen Savers and replace it with the pop culture based Attack of the Show To compare the two shows\n",
            "is like comparing apples to orangesAttack of the Show is a unique hour long program that covers\n",
            "current Generation XY culture It features segments on moviestelevision panel discussions video games\n",
            "new DVD releases sex advice new gadgets MP3 players cell phones etc comic booksgraphic novels\n",
            "magazines and internet fads Its a fun show definitely worth checking out you are in your 20s or 30s\n",
            "I give it an 8 out of 10\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: This is the best comedy period It is so underrated Clever witty humor Great casting Jerry\n",
            "Stiller is the jewel in the show he is so incredibly funny and quirky simply a comical genius Doug\n",
            "and Carrie have great chemistry I so don't see what the hype is about when it comes to Everybody\n",
            "loves Raymond it is SO overrated with lame jokes mostly forced humor and just not the witty show I\n",
            "cant remember laughing in more than 1 episode King of Queens is a rare comedy that has all the right\n",
            "ingredients to give you serious belly laughs which is normally caused by Arthur Spooner I think its\n",
            "about time this comedy gets the hype it deserves and not the lame Raymond & CO\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: Running Out of Time rests somewhere in the middle of Johnny Tos cannon in the solid good\n",
            "category As a crime thriller its not terribly original or overwhelming and the action scenes will\n",
            "not blow you away but it has something else going for it Its a Johnny To film after all it has\n",
            "toAndy Lau has 72 hours to live He decides to play a strange catandmouse game with a hostage\n",
            "negotiator of the HK police played by Lau Ching Wan Thats the plot in a nutshell On top of that To\n",
            "piles layers of twists and turns that keep proceedings interesting throughout It occasionally\n",
            "becomes too convoluted for its own sake but never lets it get the best of it However just as Johnny\n",
            "To is about to hand over a slick and wellmade crime flick which lets face it are dimeadozen he slips\n",
            "in bits and pieces that bring Running Out of Time alive as a full emotional experience providing the\n",
            "soul and heart to the welloiled skeletonThe concepts of synchronism and minimalism staples in his\n",
            "work are explored in great effect here Always subtle letting the images speak for themselves giving\n",
            "them time to develop with long takes and slow tracking shots exemplary cutting to the score its all\n",
            "here A small love story in a bus between Andy Lau and a girl is among the highlights of the film and\n",
            "part of the heart Im talking about So simple yet so powerful Ditto for Laus and Lau Ching Wans car\n",
            "scenes and the bowlingroom showdownHowever something stops me from claiming Running Out of Time is a\n",
            "masterpiece To has all the ability and craftmanship down to a notch but he can also be too\n",
            "workmanlike or bland at times When hes good hes REAL good There are even isolated moment of pure\n",
            "brilliance that are just TOO good for their own sake leaving a bittersweet aftertaste for the rest\n",
            "of the movie Im convinced that if he puts his heart to it he can make a really great film As it is\n",
            "this is another one of his films that is flawed but enjoyable Underneath the slick HK style its the\n",
            "black humour and heartfelt drama that makes this a compelling film Worth watching definitely\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: Will there be please coming an end to hyping movies that are dealing about social conflicts\n",
            "or other human disasters Okay Care is about childabuse Care is about perverts misusing boys in a\n",
            "school and how disgusting it might be if its a movie with a poor script and made with bad playing\n",
            "actors then it stays a bad movie Care is a movie that could have been but is it because it was a\n",
            "tvmovie I dont know but everything seemed so limited that it comes over as some cheap movie that\n",
            "will be seen by some housewifes and fathers who decide not to go to bed There are so many unanswered\n",
            "things in this moviethe relation with his mother for instance or the death of some abused boy from\n",
            "which we know nothing more Care should have been much much better\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n",
            "Review: This movie appears to have been overlooked by everyone Someone should bring it out on VHS\n",
            "and DVD It is an excellent film and far superior to the one with Brooke Shields which was terrible\n",
            "Jean Simmons deserves more credit than she is getting now days It would be nice if all her films\n",
            "were offered on VHS or DVD Jean Simmons was and still is a very good actress She certainly was a\n",
            "beauty In fact she is still a beauty She also has done extremely well on TV She is so much better\n",
            "than many of the actors today\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: When I heard there was to be an ABC Australian Broadcasting Corporation miniseries based on\n",
            "life in Changi WWII POW camp with a focus on elements of comedy I was deeply sceptical and somewhat\n",
            "criticalMy father had served in the second world war Such was the barbarity of the Japanese he was\n",
            "able to talk about the horrors in and around Labuan where he was stationed until only quite recently\n",
            "Along with my father I had been awarded the fortune of knowing many great men of stronger character\n",
            "and spirit than I shall ever have who had witnessed acts of unspeakable barbarity at the hands of\n",
            "the Empire of Japan and had never completely recovered The name Changi is destined to conjure\n",
            "horrific images for ages to comeBut upon viewing I was highly impressed with the cast the characters\n",
            "and the complex plotlines of this wonderful series I now regard Changi as the highlight of my week\n",
            "bear in mind I have viewed only three episodes so far I hope the remaining episodes adhere to the\n",
            "standards set by the first threeThe black humour works uncannily well however the flatulence jokes\n",
            "are a little overdone and while much of the horror has been suppressed the series comes quite close\n",
            "in relaying the undaunted spirit of the survivors who were able to later continue with their lives\n",
            "in spite of the inhibiting memoriesThe flashback format of this series will be difficult for some to\n",
            "followbut I can not think of no better way to do adequate justice to the men who suffered deep\n",
            "emotional scarring proceeding internment when painfully suppressed experiences are remembered\n",
            "sometimes years after the horrorOne of the darkest chapters of the Second World War the 20th century\n",
            "and I would go so far as to say in the history of mankind is being relayed to a new generation\n",
            "through this series and I hope it serves to relay the overwhelming adversity borne by the wartime\n",
            "generationProceeding Changi I dont think I shall ever be able to listen to the poignant tune on the\n",
            "road to Gundagai in the same way again Tune in\n",
            "\n",
            "Actual sentiment: positive\n",
            "Predicted sentiment: positive\n",
            "=====================================================================\n",
            "\n",
            "Review: I was pretty enthusiastic about seeing this movie when it came out Commercials for it made\n",
            "it look quirky and I generally like Morgan Freeman and Chris Rock and the combination of the two\n",
            "seemed like an interesting idea Sadly I was terribly disappointed with Nurse BettyPersonally Ive\n",
            "usually found that graphic violence and comedy dont go all that well together and the only directors\n",
            "that have ever combined the two successfully in my opinion are Tarantino and the Coens There isnt\n",
            "that much violence in Nurse Betty but what violence is in it made me feel kind of weird when I was\n",
            "supposed to laugh Of course for me part of the problem was also that there didnt seem to be many\n",
            "places where I was being asked toThe film doesnt much work as a drama either Renee Zellwegers Betty\n",
            "the storys protagonist is clinically insane and impossible to relate to in any real way I will say\n",
            "Zellweger acts the role quite well and Freeman Rock and Greg Kinnear all do good jobs too The\n",
            "problem is in the writing Freeman is the only person that gets to play an interesting character Its\n",
            "really too bad 310\n",
            "\n",
            "Actual sentiment: negative\n",
            "Predicted sentiment: negative\n",
            "=====================================================================\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lATfuiHYHq_1",
        "colab_type": "text"
      },
      "source": [
        "Now predict on all the test dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lvWQGLXhzHtn",
        "colab_type": "code",
        "outputId": "c0f5490b-2ade-4795-fa3d-1f0f1746e23c",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 66,
          "referenced_widgets": [
            "6aaf51cb9ad44c94b6a174a8768904f7",
            "51d23e1199274477a69557c74609afb2",
            "029f74818c6842d7a28af62032418880",
            "8db144e9144141779a1088c4bc000a99",
            "210517aede4f4cfab9120fdeb3d8361a",
            "df9bc2dc2b3c4fee98affdd7f5ca1ef6",
            "b684a47485af4cb1934d57cbb03a4f57",
            "942d20b134964d1d895af69938918464"
          ]
        }
      },
      "source": [
        "loader = DataLoader(dataset, batch_size=32, num_workers=4)\n",
        "model.model.eval()\n",
        "outputs = []\n",
        "targets = []\n",
        "for batch in tqdm(loader):\n",
        "  outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \n",
        "                              attention_mask=batch['source_mask'].cuda(), \n",
        "                              max_length=2)\n",
        "\n",
        "  dec = [tokenizer.decode(ids) for ids in outs]\n",
        "  target = [tokenizer.decode(ids) for ids in batch[\"target_ids\"]]\n",
        "  \n",
        "  outputs.extend(dec)\n",
        "  targets.extend(target)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "6aaf51cb9ad44c94b6a174a8768904f7",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZBxEcXeWGafd",
        "colab_type": "text"
      },
      "source": [
        "Let's check if the model generates any invalid text"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Y_qylwYGXgwY",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "for i, out in enumerate(outputs):\n",
        "  if out not in ['positive', 'negative']:\n",
        "    print(i, 'detected invalid prediction')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "MpU_VkFGIgnw",
        "colab_type": "text"
      },
      "source": [
        "This great is great! Our model hasn't generated any invalid prediction. Let's calculate accuarcy and other metrics"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EdJcQODoOChP",
        "colab_type": "code",
        "outputId": "22fc6852-5443-43e4-d87e-5a5266ddffd9",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "metrics.accuracy_score(targets, outputs)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.94712"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 41
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YepnSgI5OKti",
        "colab_type": "code",
        "outputId": "a2914edf-d572-4166-a886-6c0d731835e5",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 168
        }
      },
      "source": [
        "print(metrics.classification_report(targets, outputs))"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "              precision    recall  f1-score   support\n",
            "\n",
            "    negative       0.95      0.95      0.95     12500\n",
            "    positive       0.95      0.95      0.95     12500\n",
            "\n",
            "    accuracy                           0.95     25000\n",
            "   macro avg       0.95      0.95      0.95     25000\n",
            "weighted avg       0.95      0.95      0.95     25000\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UcZqrJELrRVw",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Dhqigmiw2hVh",
        "colab_type": "text"
      },
      "source": [
        "## Emotion classification\n",
        "\n",
        "While most of the sentiment-analysis datasets are binary with 'postive' and 'negative' sentiments, [Elvis Saravia](https://twitter.com/omarsar0)  has put together a great [dataset](https://github.com/dair-ai/emotion_dataset) for emotion recognition. The task is given some text classifiy the text into one of the following six emotions \n",
        "\n",
        "'sadness', 'joy', 'anger', 'fear', 'surprise', 'love'.\n",
        "\n",
        "Here's the [original notebook](https://colab.research.google.com/drive/1nwCE6b9PXIKhv2hvbqf1oZKIGkXMTi1X#scrollTo=pSzoz9InH0Ta) which trains ROBERTa model to classify the text"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "0B4IhzEgO21B",
        "colab_type": "text"
      },
      "source": [
        "### Download and view data"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6eQhtsD65svj",
        "colab_type": "code",
        "outputId": "a46f0a9a-27bb-4d10-c7a3-b45b3c894526",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "!wget https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt\n",
        "!wget https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt\n",
        "!wget https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2020-05-09 06:38:34--  https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt\n",
            "Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\n",
            "Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\n",
            "HTTP request sent, awaiting response... 301 Moved Permanently\n",
            "Location: /s/raw/ikkqxfdbdec3fuj/test.txt [following]\n",
            "--2020-05-09 06:38:34--  https://www.dropbox.com/s/raw/ikkqxfdbdec3fuj/test.txt\n",
            "Reusing existing connection to www.dropbox.com:443.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com/cd/0/inline/A3U44u6Qw37AC-ysKv8gHOrYJywB8BLj-SWf4UtgNCVbhch6g7hz1JW0yVUjMcYZGe1daItFDuwZIfhuumccC7WN93mOzuubDPQ-xL4xEAH-ugpp_5KfcQCc-l4yNej1pUo/file# [following]\n",
            "--2020-05-09 06:38:35--  https://ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com/cd/0/inline/A3U44u6Qw37AC-ysKv8gHOrYJywB8BLj-SWf4UtgNCVbhch6g7hz1JW0yVUjMcYZGe1daItFDuwZIfhuumccC7WN93mOzuubDPQ-xL4xEAH-ugpp_5KfcQCc-l4yNej1pUo/file\n",
            "Resolving ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com (ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\n",
            "Connecting to ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com (ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 206760 (202K) [text/plain]\n",
            "Saving to: ‘test.txt’\n",
            "\n",
            "\rtest.txt              0%[                    ]       0  --.-KB/s               \rtest.txt            100%[===================>] 201.91K  --.-KB/s    in 0.07s   \n",
            "\n",
            "2020-05-09 06:38:35 (2.79 MB/s) - ‘test.txt’ saved [206760/206760]\n",
            "\n",
            "--2020-05-09 06:38:37--  https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt\n",
            "Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\n",
            "Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\n",
            "HTTP request sent, awaiting response... 301 Moved Permanently\n",
            "Location: /s/raw/1pzkadrvffbqw6o/train.txt [following]\n",
            "--2020-05-09 06:38:38--  https://www.dropbox.com/s/raw/1pzkadrvffbqw6o/train.txt\n",
            "Reusing existing connection to www.dropbox.com:443.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com/cd/0/inline/A3VHSDKXCes7IMws7cQCAbiXyNW7dOk9CYhiTjzghv3EcDtNVLX37OVjW43i4mNdbrYOdNNhfqFlbysOgj9PUEvDo4b1Uq_2cChy-FGiz0-mNYIc07lv7AoBSphOulwSRY8/file# [following]\n",
            "--2020-05-09 06:38:38--  https://uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com/cd/0/inline/A3VHSDKXCes7IMws7cQCAbiXyNW7dOk9CYhiTjzghv3EcDtNVLX37OVjW43i4mNdbrYOdNNhfqFlbysOgj9PUEvDo4b1Uq_2cChy-FGiz0-mNYIc07lv7AoBSphOulwSRY8/file\n",
            "Resolving uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com (uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\n",
            "Connecting to uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com (uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 1658616 (1.6M) [text/plain]\n",
            "Saving to: ‘train.txt’\n",
            "\n",
            "train.txt           100%[===================>]   1.58M  --.-KB/s    in 0.1s    \n",
            "\n",
            "2020-05-09 06:38:38 (13.8 MB/s) - ‘train.txt’ saved [1658616/1658616]\n",
            "\n",
            "--2020-05-09 06:38:41--  https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt\n",
            "Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\n",
            "Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\n",
            "HTTP request sent, awaiting response... 301 Moved Permanently\n",
            "Location: /s/raw/2mzialpsgf9k5l3/val.txt [following]\n",
            "--2020-05-09 06:38:41--  https://www.dropbox.com/s/raw/2mzialpsgf9k5l3/val.txt\n",
            "Reusing existing connection to www.dropbox.com:443.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com/cd/0/inline/A3Uqwy1biv6Ipmcdb5FAQtm-d1nMaHHQJKrKTqu-TusvptAdDtwpPRPxMIuZovISOPSJPhwNP1imjPtokJO3KO6OlofN61eqKzGDn-P7BovjRs9wkVRJW0HjMMaz8Q5vmGU/file# [following]\n",
            "--2020-05-09 06:38:42--  https://uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com/cd/0/inline/A3Uqwy1biv6Ipmcdb5FAQtm-d1nMaHHQJKrKTqu-TusvptAdDtwpPRPxMIuZovISOPSJPhwNP1imjPtokJO3KO6OlofN61eqKzGDn-P7BovjRs9wkVRJW0HjMMaz8Q5vmGU/file\n",
            "Resolving uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com (uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\n",
            "Connecting to uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com (uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 204240 (199K) [text/plain]\n",
            "Saving to: ‘val.txt’\n",
            "\n",
            "val.txt             100%[===================>] 199.45K  --.-KB/s    in 0.07s   \n",
            "\n",
            "2020-05-09 06:38:42 (2.75 MB/s) - ‘val.txt’ saved [204240/204240]\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yVrcVbvx74G5",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!mkdir emotion_data\n",
        "!mv *.txt emotion_data"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jOpnh3Y06BGU",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train_path = \"emotion_data/train.txt\"\n",
        "test_path = \"emotion_data/test.txt\"\n",
        "val_path = \"emotion_data/val.txt\"\n",
        "\n",
        "## emotion labels\n",
        "label2int = {\n",
        "  \"sadness\": 0,\n",
        "  \"joy\": 1,\n",
        "  \"love\": 2,\n",
        "  \"anger\": 3,\n",
        "  \"fear\": 4,\n",
        "  \"surprise\": 5\n",
        "}"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "r4sDek6T8PXE",
        "colab_type": "code",
        "outputId": "a061ba43-03d8-4fdc-b715-b6fca8d57388",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 313
        }
      },
      "source": [
        "data = pd.read_csv(train_path, sep=\";\", header=None, names=['text', 'emotion'],\n",
        "                               engine=\"python\")\n",
        "data.emotion.value_counts().plot.bar()"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7f20bb40cfd0>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 49
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEXCAYAAABBFpRtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAWn0lEQVR4nO3df9AlVX3n8fdHEH/EH6BMKATiYJzoYlQ0I7Ar2SiEX6JCFA2uP0ZDMlUuRuO6u2JWl4piCmNKd01FVhQiuolINCoLKk5Q1qhRGBBBUMKIUMKiTBwEIwEFv/tHnwevOMPzDHPn9sw971fVU7f7dN/b32a4n9v39Om+qSokSX2439gFSJJmx9CXpI4Y+pLUEUNfkjpi6EtSRwx9SerIjmMXcG923XXXWr58+dhlSNJ25eKLL/7nqlq2sWXbdOgvX76ctWvXjl2GJG1Xkly3qWV270hSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6sk1fnHVfLT/h3Jlu79qTj5zp9iTpvvJIX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHlhT6Sa5NcnmSS5OsbW2PSLImydXtcZfWniTvSrIuyWVJnjrxOqva+lcnWbV1dkmStCmbc6T/zKrat6pWtvkTgPOragVwfpsHOAJY0f5WA6fA8CEBnAjsD+wHnLjwQSFJmo0t6d45CjijTZ8BHD3R/oEafBnYOcnuwGHAmqraUFU3A2uAw7dg+5KkzbTU0C/gM0kuTrK6te1WVTe26e8Cu7XpPYDvTDz3+ta2qfafk2R1krVJ1q5fv36J5UmSlmKpt1Y+sKpuSPLLwJok35xcWFWVpKZRUFWdCpwKsHLlyqm8piRpsKQj/aq6oT3eBHyMoU/+e63bhvZ4U1v9BmCviafv2do21S5JmpFFQz/JLyV56MI0cCjwdeBsYGEEzirgE236bOBlbRTPAcAtrRvoPODQJLu0E7iHtjZJ0owspXtnN+BjSRbW/5uq+nSSi4CzkhwHXAe8sK3/SeBZwDrgNuAVAFW1IclbgIvaem+uqg1T2xNJ0qIWDf2qugZ48kbavw8cvJH2Ao7fxGudDpy++WVKkqbBK3IlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SerIUu+9o23I8hPOnen2rj35yJluT9LW45G+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkeWHPpJdkjy1STntPm9k3wlybokH06yU2t/QJtf15Yvn3iNN7T2q5IcNu2dkSTdu8050n8N8I2J+bcB76yqxwI3A8e19uOAm1v7O9t6JNkHOBZ4AnA48O4kO2xZ+ZKkzbGk0E+yJ3Ak8L42H+Ag4CNtlTOAo9v0UW2etvzgtv5RwJlVdUdVfRtYB+w3jZ2QJC3NUo/0/wfwX4GftvlHAj+oqjvb/PXAHm16D+A7AG35LW39u9s38hxJ0gwsGvpJng3cVFUXz6AekqxOsjbJ2vXr189ik5LUjaUc6T8deG6Sa4EzGbp1/iewc5Id2zp7Aje06RuAvQDa8ocD359s38hz7lZVp1bVyqpauWzZss3eIUnSpi0a+lX1hqras6qWM5yI/WxVvRj4HHBMW20V8Ik2fXabpy3/bFVVaz+2je7ZG1gBXDi1PZEkLWrHxVfZpNcDZyY5CfgqcFprPw34YJJ1wAaGDwqq6ookZwFXAncCx1fVXVuwfUnSZtqs0K+qC4AL2vQ1bGT0TVXdDrxgE89/K/DWzS1SkjQdXpErSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOrJo6Cd5YJILk3wtyRVJ/qS1753kK0nWJflwkp1a+wPa/Lq2fPnEa72htV+V5LCttVOSpI1bypH+HcBBVfVkYF/g8CQHAG8D3llVjwVuBo5r6x8H3Nza39nWI8k+wLHAE4DDgXcn2WGaOyNJuneLhn4N/qXN3r/9FXAQ8JHWfgZwdJs+qs3Tlh+cJK39zKq6o6q+DawD9pvKXkiSlmRJffpJdkhyKXATsAb4FvCDqrqzrXI9sEeb3gP4DkBbfgvwyMn2jTxnclurk6xNsnb9+vWbv0eSpE1aUuhX1V1VtS+wJ8PR+eO3VkFVdWpVrayqlcuWLdtam5GkLm3W6J2q+gHwOeDfAjsn2bEt2hO4oU3fAOwF0JY/HPj+ZPtGniNJmoGljN5ZlmTnNv0g4BDgGwzhf0xbbRXwiTZ9dpunLf9sVVVrP7aN7tkbWAFcOK0dkSQtbsfFV2F34Iw20uZ+wFlVdU6SK4Ezk5wEfBU4ra1/GvDBJOuADQwjdqiqK5KcBVwJ3AkcX1V3TXd3JEn3ZtHQr6rLgKdspP0aNjL6pqpuB16widd6K/DWzS9TkjQNXpErSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVkx7ELkO5p+QnnznR715585Ey3J43JI31J6siioZ9krySfS3JlkiuSvKa1PyLJmiRXt8ddWnuSvCvJuiSXJXnqxGutautfnWTV1tstSdLGLOVI/07gdVW1D3AAcHySfYATgPOragVwfpsHOAJY0f5WA6fA8CEBnAjsD+wHnLjwQSFJmo1FQ7+qbqyqS9r0D4FvAHsARwFntNXOAI5u00cBH6jBl4Gdk+wOHAasqaoNVXUzsAY4fKp7I0m6V5vVp59kOfAU4CvAblV1Y1v0XWC3Nr0H8J2Jp13f2jbVLkmakSWHfpKHAB8F/qiqbp1cVlUF1DQKSrI6ydoka9evXz+Nl5QkNUsK/ST3Zwj8v66qv2vN32vdNrTHm1r7DcBeE0/fs7Vtqv3nVNWpVbWyqlYuW7Zsc/ZFkrSIRcfpJwlwGvCNqnrHxKKzgVXAye3xExPtr0pyJsNJ21uq6sYk5wF/OnHy9lDgDdPZDWn74XUIGtNSLs56OvBS4PIkl7a2P2YI+7OSHAdcB7ywLfsk8CxgHXAb8AqAqtqQ5C3ARW29N1fVhqnshSRpSRYN/ar6ApBNLD54I+sXcPwmXut04PTNKVCSND1ekStJHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjqy49gFSJofy084d6bbu/bkI2e6vXngkb4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR1ZNPSTnJ7kpiRfn2h7RJI1Sa5uj7u09iR5V5J1SS5L8tSJ56xq61+dZNXW2R1J0r1ZypH++4HD79F2AnB+Va0Azm/zAEcAK9rfauAUGD4kgBOB/YH9gBMXPigkSbOzaOhX1eeBDfdoPgo4o02fARw90f6BGnwZ2DnJ7sBhwJqq2lBVNwNr+MUPEknSVnZf+/R3q6ob2/R3gd3a9B7AdybWu761bar9FyRZnWRtkrXr16+/j+VJkjZmi0/kVlUBNYVaFl7v1KpaWVUrly1bNq2XlSRx30P/e63bhvZ4U2u/AdhrYr09W9um2iVJM3RfQ/9sYGEEzirgExPtL2ujeA4AbmndQOcBhybZpZ3APbS1SZJmaNFbKyf5EPAMYNck1zOMwjkZOCvJccB1wAvb6p8EngWsA24DXgFQVRuSvAW4qK335qq658lhSdJWtmjoV9WLNrHo4I2sW8Dxm3id04HTN6s6SdJUeUWuJHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUkUXvvSNJGiw/4dyZbu/ak4+c+mt6pC9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSMzD/0khye5Ksm6JCfMevuS1LOZhn6SHYC/BI4A9gFelGSfWdYgST2b9ZH+fsC6qrqmqn4MnAkcNeMaJKlbqarZbSw5Bji8qn6/zb8U2L+qXjWxzmpgdZt9HHDVzAqEXYF/nuH2Zs39277N8/7N877B7Pfv0VW1bGMLdpxhEUtSVacCp46x7SRrq2rlGNueBfdv+zbP+zfP+wbb1v7NunvnBmCvifk9W5skaQZmHfoXASuS7J1kJ+BY4OwZ1yBJ3Zpp905V3ZnkVcB5wA7A6VV1xSxrWMQo3Uoz5P5t3+Z5/+Z532Ab2r+ZnsiVJI3LK3IlqSOGviR1pOvQT/KcJF3/N5DUl94D73eBq5P8WZLHj13M1pZklyRPGruOachgr8XXlDSp69CvqpcATwG+Bbw/yT8mWZ3koSOXNjVJLkjysCSPAC4B3pvkHWPXtaVqGIHwybHr2FqS7JDkm2PXsbUleXSS327TD5qz995uSU5L8qk2v0+S48auq+vQB6iqW4GPMNwHaHfgd4BLkvzhqIVNz8PbPj4P+EBV7Q/89sg1TcslSZ42dhFbQ1XdBVyV5FfGrmVrSfIHDO+997SmPYGPj1fR1L2fYXj6o9r8PwF/NFo1Tdehn+S5ST4GXADcH9ivqo4Angy8bszapmjHJLsDLwTOGbuYKdsf+Mck30pyWZLLk1w2dlFTtAtwRZLzk5y98Dd2UVN0PPB04FaAqroa+OVRK5quXavqLOCnMFynBNw1bknb4L13Zuz5wDur6vOTjVV127bwNWxK3sxwtPGFqrooyWOAq0euaVoOG7uArexNYxewld1RVT9OAkCSHYF5unDoR0keSdunJAcAt4xbkhdnkWQ3YKGL4MKqumnMerR5khwIrKiqv0qyDHhIVX177Lq0uCR/BvwAeBnwh8B/BK6sqv82amFTkuSpwF8Avw58HVgGHFNVo34b7Tr0k7wA+HOG7p0Avwn8l6r6yJh1TVN7Y50E/CvwaeBJwGur6n+PWtgUJDkRWAk8rqp+LcmjgL+tqqePXNpUtCPDvwD+DbATw61LflRVDxu1sClpw6WPAw5leP+dB7yv5iiU2reXxzHs31VV9ZORS+o+9L8GHLJwdN+OFP++qp48bmXTk+TSqto3ye8Azwb+E/D5edjHJJcyjL66pKqe0touq6p5GZa6luGmhH/L8OH2MuDXquoNoxY2JUmeB5xbVXeMXcvW0A4qP11VP0zyRuCpwElVdcmYdXV9Ihe43z26c77P/P03WThvcyTDUfDofYpT9ON2VLjQZ/pLI9czdVW1Dtihqu6qqr8CDh+7pil6DvBPST6Y5NntqHievKkF/oHAwcBpwCkj1zR3Abe5Pp3kvCQvT/JyhnHfnxq5pmk7p433/g3g/PZt5vaRa5qWs5K8B9i5Df/7e+C9I9c0Tbe1W5Bf2i4gfC1z9J6tqlcAj2X4JvMi4FtJ3jduVVO1MFLnSOC9VXUuQzfdqLru3oG7v2Iu9AH/Q1XN0zhhANqFWbdU1V3taPihVfXdseuahiSHMNEnXFVrRi5papI8GvgeQ1C8Fng48O529D83ktyf4RvMK4B/X1W7jlzSVCQ5h+FHog5h6Nr5V4bBIqN2rXYZ+km+UFUHJvkhQ9dAJhb/FNgAvL2q3j1KgVOU5MEM/fi/UlWrk6xgOPE5b2P251KSBzH8283yt6JnIskRDLdCeQbDYIqzgM+08ezbvfbeOxy4vKqubtfLPLGqPjNqXT2G/mLa2NovVdXjxq5lSyX5MHAx8LKq+vX2P+KXqmrfkUvbYhMf2pNuAdYCr6uqa2Zf1fQkeQ7D6LKdqmrvJPsCb66q545c2lQk+RDwYeBT83QyN8nDqurW9g37F1TVhlnXNMnQ34Qku1fVjWPXsaUWfpA5yVcnRrh8beyvmNOQ5C3A9cDfMHxbOxb4VYZ7DL2yqp4xXnVbLsnFwEHABRP/dpdX1RPHrWx65vE6mSTnVNWzk3ybX+xJqKp6zEilAXN0Umja5iHwmx+3LoKFES6/CszLUdVzq+o9VfXDqrq1qk4FDquqDzPcwmB795ONjLaam6O0NqTxQuAFDLcJ+UqSY8atasu1wA/wW1X1mKrae+Jv1MAHb8PQgxMZLsraK8lfM5y0fvmoFU3PbUleyHDTLoBj+NnIpHkIxyuS/Adgh3Yu5tXAl0auaZreCDztntfJ8LN/z+1WVVWSc4Ft7luZR/pzro1meR5D0H8IWFlVF4xZ0xS9GHgpcBPDKJeXAi9p32xeNWZhWyLJB9vkt4AnMHwz+xDDjclGv0vjFM37dTLb5F1g7dPvQJI9gEcz8c3unjeZ07YjyZUMt7/+FPDMey4f+0TgtCR5O8NtQT7Umn4XuKyqXj9eVdPTro95LHAd8COGvv0a+4pxQ3/OJXkbw5vpCtotXhn+x9vuR4C07oA/AJbz8x9ovzdWTdOQ5NXAK4HHMIzzvnsR28CJwGlK8nx+/jqZj41ZzzS16yx+QVVdN+taJhn6cy7JVcCT5mlI3IIkXwL+gWFI6t33Ka+qj45W1BQlOaWqXjl2Hbrv2p02D2Q4x/TFse+7A4b+3Gs/1faCqvqXsWuZtoWbyY1dhzbPJq6vgJ99k5mXu4j+d4aRSX/Xmo5muP/VSeNVZejPvSQfZfglsPOZGKpZVa8eragpSXISw4Vmc/tbudp+tW/ZT66q29v8g4BLx77o0yGb8+/s9jePXgP8cZI7gJ8wZ0eK2u79P+CB/GwY8QP4+XM0o/BIX9u1dqn7CoY3FwBV9X/Hq0gaJPk4w9XGaxi6sw5huBjtehjv27ahP6eSXM69XKA09rCxaUjy+wxH+3sClwIHMHT3HDxqYRKQZNW9La+qM2ZVyyS7d+bXs9vj8e1x4YKflzAfV6vCEPhPA75cVc9M8njgT0euSSLJDsChVfXisWu5J0N/Ti2MBU5yyMLNuprXJ7kEOGGcyqbq9qq6PQlJHlBV30yy3d8ZVdu/9tsVj06yU1X9eOx6Jhn68y9Jnl5VX2wz/475udT9+iQ7Ax8H1iS5meHqR2lbcA3wxSRnM1yRC0BVvWO8kuzTn3tJfgM4neFXlwLcDPzetnCRyDQl+S2Gffz0tnZkpT4lOXFj7VX1J7OuZZKh34kkDweYsx9Gl7SZDP0OJDmS4W6Nk8Ma3zxeRdL8S/I5NjJooqoOGqGcu9mnP+eS/C/gwQx3a3wfwz3nLxy1KKkP/3li+oHA84HRf//XI/05l+SyqnrSxONDGH6T9DfHrk3qTZILq2q/MWvwSH/+LVwCfluSRwEbgN1HrEfqwj1+GP1+wEqGwQajMvTn3/9pwxrfzvCD4QW8d9ySpC5czM9+GP0nwLXAcWMWBPMzXlub9k3grnaP+b8Evswwrl3S1vV6YN+q2pvhivgfAbeNW5Kh34M3VdUPkxwIHMRwMveUkWuSevDGqrp1W3vvGfrzb+EXpY4E3ltV5wI7jViP1Itt8r1n6M+/G5K8h+F3cj+Z5AH47y7Nwjb53nPI5pxL8mDgcODyqro6ye7AE6vqMyOXJs21bfW9Z+hLUkdG/6ohSZodQ1+SOmLoS1JHDH1J6oihL0kd+f/K/NV+jg5JdwAAAABJRU5ErkJggg==\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EaKp3E1T8kkm",
        "colab_type": "code",
        "outputId": "7b0fa7d2-199e-4e6e-b895-1d216a1be7b8",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 195
        }
      },
      "source": [
        "train.head()"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>text</th>\n",
              "      <th>emotion</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>i didnt feel humiliated</td>\n",
              "      <td>sadness</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>i can go from feeling so hopeless to so damned...</td>\n",
              "      <td>sadness</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>im grabbing a minute to post i feel greedy wrong</td>\n",
              "      <td>anger</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>i am ever feeling nostalgic about the fireplac...</td>\n",
              "      <td>love</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>i am feeling grouchy</td>\n",
              "      <td>anger</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                                                text  emotion\n",
              "0                            i didnt feel humiliated  sadness\n",
              "1  i can go from feeling so hopeless to so damned...  sadness\n",
              "2   im grabbing a minute to post i feel greedy wrong    anger\n",
              "3  i am ever feeling nostalgic about the fireplac...     love\n",
              "4                               i am feeling grouchy    anger"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 50
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "i-Gt1WyPBL-6",
        "colab_type": "code",
        "outputId": "5ca664c8-5a05-4e8c-a15b-b66891f3e164",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 67
        }
      },
      "source": [
        "train.count()"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "text       16000\n",
              "emotion    16000\n",
              "dtype: int64"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 51
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KybpXVl1Die5",
        "colab_type": "code",
        "outputId": "1319d2b5-c84e-4c95-bae6-3af745326439",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 186,
          "referenced_widgets": [
            "0037bb8409bb4d65ac4ebd956fd1e631",
            "db528e3117024014b4d281b650901cbd",
            "350fc08aa59849fc9fd3f3e454583a6c",
            "be936dd408314d0d90a22f627ca517ca",
            "99f56e1a8fdb4b2282fa6e17819d044e",
            "462bd815ddbc4687bcf7695f59919f0c",
            "40edb7d92c1145ee9e3bb823e4688e16",
            "f827cd8a6bf846c590913c5ea40e6737"
          ]
        }
      },
      "source": [
        "tokenizer = T5Tokenizer.from_pretrained('t5-base')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:filelock:Lock 139955425093728 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n",
            "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpsnkx0l2g\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "0037bb8409bb4d65ac4ebd956fd1e631",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n",
            "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n",
            "INFO:filelock:Lock 139955425093728 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n",
            "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cANrUEXhO8QY",
        "colab_type": "text"
      },
      "source": [
        "### Dataset"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "8GsMQdqMPCN7",
        "colab_type": "text"
      },
      "source": [
        "Here also we will process the examples in the same way we did above. If the label is 'love' we will ask the model to predict the text 'love'"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AKh6m92eKZc4",
        "colab_type": "text"
      },
      "source": [
        "Lets check how t5 encodes the following labels"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HDnMp5-fDIAc",
        "colab_type": "code",
        "outputId": "837d1d28-2d17-4ff0-f345-64eed6949dbb",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 118
        }
      },
      "source": [
        "emotions = [ \"sadness\", \"joy\", \"love\", \"anger\", \"fear\", \"surprise\"]\n",
        "for em in emotions:\n",
        "  print(len(tokenizer.encode(em)))"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "1\n",
            "1\n",
            "1\n",
            "1\n",
            "1\n",
            "1\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "i8VIZIWFOwMj",
        "colab_type": "text"
      },
      "source": [
        "Here also all the labels are encoded as single ids"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8i8QD-3MDrWq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "class EmotionDataset(Dataset):\n",
        "  def __init__(self, tokenizer, data_dir, type_path,  max_len=512):\n",
        "    self.path = os.path.join(data_dir, type_path + '.txt')\n",
        "\n",
        "    self.data_column = \"text\"\n",
        "    self.class_column = \"emotion\"\n",
        "    self.data = pd.read_csv(self.path, sep=\";\", header=None, names=[self.data_column, self.class_column],\n",
        "                            engine=\"python\")\n",
        "    \n",
        "    self.max_len = max_len\n",
        "    self.tokenizer = tokenizer\n",
        "    self.inputs = []\n",
        "    self.targets = []\n",
        "\n",
        "    self._build()\n",
        "  \n",
        "  def __len__(self):\n",
        "    return len(self.inputs)\n",
        "  \n",
        "  def __getitem__(self, index):\n",
        "    source_ids = self.inputs[index][\"input_ids\"].squeeze()\n",
        "    target_ids = self.targets[index][\"input_ids\"].squeeze()\n",
        "\n",
        "    src_mask    = self.inputs[index][\"attention_mask\"].squeeze()  # might need to squeeze\n",
        "    target_mask = self.targets[index][\"attention_mask\"].squeeze()  # might need to squeeze\n",
        "\n",
        "    return {\"source_ids\": source_ids, \"source_mask\": src_mask, \"target_ids\": target_ids, \"target_mask\": target_mask}\n",
        "  \n",
        "  def _build(self):\n",
        "    for idx in range(len(self.data)):\n",
        "      input_, target = self.data.loc[idx, self.data_column], self.data.loc[idx, self.class_column]      \n",
        "      \n",
        "      input_ = input_ + ' </s>'\n",
        "      target = target + \" </s>\"\n",
        "\n",
        "       # tokenize inputs\n",
        "      tokenized_inputs = self.tokenizer.batch_encode_plus(\n",
        "          [input_], max_length=self.max_len, pad_to_max_length=True, return_tensors=\"pt\"\n",
        "      )\n",
        "       # tokenize targets\n",
        "      tokenized_targets = self.tokenizer.batch_encode_plus(\n",
        "          [target], max_length=2, pad_to_max_length=True, return_tensors=\"pt\"\n",
        "      )\n",
        "\n",
        "      self.inputs.append(tokenized_inputs)\n",
        "      self.targets.append(tokenized_targets)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kRz5jyl3FBkv",
        "colab_type": "code",
        "outputId": "b3587087-efa7-400b-f3f4-ebc958deb33d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "dataset = EmotionDataset(tokenizer, 'emotion_data', 'val', 512)\n",
        "len(dataset)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "2000"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 54
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jxT6QzUAFQN0",
        "colab_type": "code",
        "outputId": "68122a3a-bf3e-4125-f768-a6410abed5a9",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 50
        }
      },
      "source": [
        "data = dataset[42]\n",
        "print(tokenizer.decode(data['source_ids']))\n",
        "print(tokenizer.decode(data['target_ids']))"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "i honestly don't feel discouraged today as i usually do\n",
            "sadness\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PBVHtdIuFpID",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "DEWi6c-pGZV9",
        "colab_type": "text"
      },
      "source": [
        "### Train"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "wGrpDJnLPQ0Q",
        "colab_type": "text"
      },
      "source": [
        "As I said above there's no need to change the model or add task specific head or any other hyperparameters, we'll just change the dataset and that's it!"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "kDep-uIcGYX2",
        "colab": {}
      },
      "source": [
        "!mkdir -p t5_emotion"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TgNOy7a4LJ9h",
        "colab_type": "code",
        "outputId": "3945df44-55d0-40d2-d98c-fa196bb9d554",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        }
      },
      "source": [
        "args_dict.update({'data_dir': 'emotion_data', 'output_dir': 't5_emotion', 'num_train_epochs':2})\n",
        "args = argparse.Namespace(**args_dict)\n",
        "print(args_dict)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "{'data_dir': 'emotion_data', 'output_dir': 't5_emotion', 'model_name_or_path': 't5-base', 'tokenizer_name_or_path': 't5-base', 'max_seq_length': 512, 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'warmup_steps': 0, 'train_batch_size': 8, 'eval_batch_size': 8, 'num_train_epochs': 2, 'gradient_accumulation_steps': 16, 'n_gpu': 1, 'early_stop_callback': False, 'fp_16': False, 'opt_level': 'O1', 'max_grad_norm': 1.0, 'seed': 42}\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "at783kr7KvS4",
        "colab": {}
      },
      "source": [
        "checkpoint_callback = pl.callbacks.ModelCheckpoint(\n",
        "    filepath=args.output_dir, prefix=\"checkpoint\", monitor=\"val_loss\", mode=\"min\", save_top_k=5\n",
        ")\n",
        "\n",
        "train_params = dict(\n",
        "    accumulate_grad_batches=args.gradient_accumulation_steps,\n",
        "    gpus=args.n_gpu,\n",
        "    max_epochs=args.num_train_epochs,\n",
        "    early_stop_callback=False,\n",
        "    precision= 16 if args.fp_16 else 32,\n",
        "    amp_level=args.opt_level,\n",
        "    gradient_clip_val=args.max_grad_norm,\n",
        "    checkpoint_callback=checkpoint_callback,\n",
        "    callbacks=[LoggingCallback()],\n",
        ")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "1LBvpP01KvTA",
        "colab": {}
      },
      "source": [
        "def get_dataset(tokenizer, type_path, args):\n",
        "  return EmotionDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path,  max_len=args.max_seq_length)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "v3Tty_OHGlvR",
        "colab_type": "code",
        "outputId": "0423fedb-7a93-4990-c6ce-545b52b86e63",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 978
        }
      },
      "source": [
        "model = T5FineTuner(args)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n",
            "INFO:transformers.configuration_utils:Model config T5Config {\n",
            "  \"architectures\": [\n",
            "    \"T5WithLMHeadModel\"\n",
            "  ],\n",
            "  \"d_ff\": 3072,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 768,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"n_positions\": 512,\n",
            "  \"num_heads\": 12,\n",
            "  \"num_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"task_specific_params\": {\n",
            "    \"summarization\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"length_penalty\": 2.0,\n",
            "      \"max_length\": 200,\n",
            "      \"min_length\": 30,\n",
            "      \"no_repeat_ngram_size\": 3,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"summarize: \"\n",
            "    },\n",
            "    \"translation_en_to_de\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to German: \"\n",
            "    },\n",
            "    \"translation_en_to_fr\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to French: \"\n",
            "    },\n",
            "    \"translation_en_to_ro\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to Romanian: \"\n",
            "    }\n",
            "  },\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n",
            "INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n",
            "INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\n",
            "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mIsW9pwEG27D",
        "colab_type": "code",
        "outputId": "d0469592-9403-4397-c8cf-b2b4c48ba614",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 50
        }
      },
      "source": [
        "trainer = pl.Trainer(**train_params)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:lightning:GPU available: True, used: True\n",
            "INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xmk4GsEMHTfZ",
        "colab_type": "code",
        "outputId": "ba492b59-fc67-4fd3-d42a-5965600679df",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "61d58772a6a64c5c8ad30dab2563a56f",
            "4000e73e6d804763986dc9a9c74456aa",
            "0dd99276ab294c939d83320f4674d5c2",
            "d306f7ff1ec94561aeed9ff59ba9b54b",
            "0893a9730450433fa76a74b008a6f482",
            "f8873c7201e1410cb0ec52cb7e34c3c9",
            "234eb8b041c44358b2f993c2853162f7",
            "8f73da698e85474fbecfd91bb7770c56",
            "26a0cb124049417aa9dbdd010e3af03a",
            "8a14bd8f2a424b15b48426fd5e320678",
            "09ed6242c5ef4a4791a1074ff7e4616e",
            "487a6ea92fe0463ebbcb63094fde5136",
            "c050be8414044acdb1a496495d148302",
            "56a67d534f284df0bc1121f1e264f5e2",
            "f168c4ae2d014e89bacc58e43427302e",
            "5cabe7d5ed6b46be882c558d28a29ca2",
            "1681a9ce7f9340caa50c4204777a6f9e",
            "a9f0c66f958e493286155c8d2631d255",
            "e04d6312d5d4425ab726588c485e668c",
            "fab8ee7d5d3940819eb9131efbbad791",
            "6dd2781f88eb4549b4203dfec9c1a98e",
            "893ba880ac6545baa6eb4a532ecc5753",
            "d4fc7ae628c94a758ce694318bc620ba",
            "4c33ca548b5e4738abdac09575e2a325",
            "ff475d6cdc074c14aa7b2cfede771b07",
            "d77faf8b9ea6480abe594114823ca52f",
            "ee4f41b591fe41a5a2d915c343b16c1d",
            "d8946214acc44c4cb97688538daaa33f",
            "9b9306452732495cbb1acd3e2fcf3b69",
            "f42e9e596ad0485b842fee92d1884750",
            "1d9f8718ba4d4b60997757ea7f1db72b",
            "63db466ae63b42a5a79d051ef5af653e"
          ]
        }
      },
      "source": [
        "trainer.fit(model)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:lightning:\n",
            "    | Name                                                                  | Type                       | Params\n",
            "-----------------------------------------------------------------------------------------------------------------\n",
            "0   | model                                                                 | T5ForConditionalGeneration | 222 M \n",
            "1   | model.shared                                                          | Embedding                  | 24 M  \n",
            "2   | model.encoder                                                         | T5Stack                    | 109 M \n",
            "3   | model.encoder.block                                                   | ModuleList                 | 84 M  \n",
            "4   | model.encoder.block.0                                                 | T5Block                    | 7 M   \n",
            "5   | model.encoder.block.0.layer                                           | ModuleList                 | 7 M   \n",
            "6   | model.encoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "7   | model.encoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "8   | model.encoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "9   | model.encoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "10  | model.encoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "11  | model.encoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "12  | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \n",
            "13  | model.encoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "14  | model.encoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \n",
            "15  | model.encoder.block.0.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "16  | model.encoder.block.0.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "17  | model.encoder.block.0.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "18  | model.encoder.block.0.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "19  | model.encoder.block.0.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "20  | model.encoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "21  | model.encoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \n",
            "22  | model.encoder.block.1                                                 | T5Block                    | 7 M   \n",
            "23  | model.encoder.block.1.layer                                           | ModuleList                 | 7 M   \n",
            "24  | model.encoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "25  | model.encoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "26  | model.encoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "27  | model.encoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "28  | model.encoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "29  | model.encoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "30  | model.encoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "31  | model.encoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \n",
            "32  | model.encoder.block.1.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "33  | model.encoder.block.1.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "34  | model.encoder.block.1.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "35  | model.encoder.block.1.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "36  | model.encoder.block.1.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "37  | model.encoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "38  | model.encoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \n",
            "39  | model.encoder.block.2                                                 | T5Block                    | 7 M   \n",
            "40  | model.encoder.block.2.layer                                           | ModuleList                 | 7 M   \n",
            "41  | model.encoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "42  | model.encoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "43  | model.encoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "44  | model.encoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "45  | model.encoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "46  | model.encoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "47  | model.encoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "48  | model.encoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \n",
            "49  | model.encoder.block.2.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "50  | model.encoder.block.2.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "51  | model.encoder.block.2.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "52  | model.encoder.block.2.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "53  | model.encoder.block.2.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "54  | model.encoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "55  | model.encoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \n",
            "56  | model.encoder.block.3                                                 | T5Block                    | 7 M   \n",
            "57  | model.encoder.block.3.layer                                           | ModuleList                 | 7 M   \n",
            "58  | model.encoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "59  | model.encoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "60  | model.encoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "61  | model.encoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "62  | model.encoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "63  | model.encoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "64  | model.encoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "65  | model.encoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \n",
            "66  | model.encoder.block.3.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "67  | model.encoder.block.3.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "68  | model.encoder.block.3.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "69  | model.encoder.block.3.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "70  | model.encoder.block.3.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "71  | model.encoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "72  | model.encoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \n",
            "73  | model.encoder.block.4                                                 | T5Block                    | 7 M   \n",
            "74  | model.encoder.block.4.layer                                           | ModuleList                 | 7 M   \n",
            "75  | model.encoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "76  | model.encoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "77  | model.encoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "78  | model.encoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "79  | model.encoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "80  | model.encoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "81  | model.encoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "82  | model.encoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \n",
            "83  | model.encoder.block.4.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "84  | model.encoder.block.4.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "85  | model.encoder.block.4.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "86  | model.encoder.block.4.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "87  | model.encoder.block.4.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "88  | model.encoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "89  | model.encoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \n",
            "90  | model.encoder.block.5                                                 | T5Block                    | 7 M   \n",
            "91  | model.encoder.block.5.layer                                           | ModuleList                 | 7 M   \n",
            "92  | model.encoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "93  | model.encoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "94  | model.encoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "95  | model.encoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "96  | model.encoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "97  | model.encoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "98  | model.encoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "99  | model.encoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \n",
            "100 | model.encoder.block.5.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "101 | model.encoder.block.5.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "102 | model.encoder.block.5.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "103 | model.encoder.block.5.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "104 | model.encoder.block.5.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "105 | model.encoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "106 | model.encoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \n",
            "107 | model.encoder.block.6                                                 | T5Block                    | 7 M   \n",
            "108 | model.encoder.block.6.layer                                           | ModuleList                 | 7 M   \n",
            "109 | model.encoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "110 | model.encoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "111 | model.encoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "112 | model.encoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "113 | model.encoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "114 | model.encoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "115 | model.encoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "116 | model.encoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \n",
            "117 | model.encoder.block.6.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "118 | model.encoder.block.6.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "119 | model.encoder.block.6.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "120 | model.encoder.block.6.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "121 | model.encoder.block.6.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "122 | model.encoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "123 | model.encoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \n",
            "124 | model.encoder.block.7                                                 | T5Block                    | 7 M   \n",
            "125 | model.encoder.block.7.layer                                           | ModuleList                 | 7 M   \n",
            "126 | model.encoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "127 | model.encoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "128 | model.encoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "129 | model.encoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "130 | model.encoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "131 | model.encoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "132 | model.encoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "133 | model.encoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \n",
            "134 | model.encoder.block.7.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "135 | model.encoder.block.7.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "136 | model.encoder.block.7.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "137 | model.encoder.block.7.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "138 | model.encoder.block.7.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "139 | model.encoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "140 | model.encoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \n",
            "141 | model.encoder.block.8                                                 | T5Block                    | 7 M   \n",
            "142 | model.encoder.block.8.layer                                           | ModuleList                 | 7 M   \n",
            "143 | model.encoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "144 | model.encoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "145 | model.encoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "146 | model.encoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "147 | model.encoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "148 | model.encoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "149 | model.encoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "150 | model.encoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \n",
            "151 | model.encoder.block.8.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "152 | model.encoder.block.8.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "153 | model.encoder.block.8.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "154 | model.encoder.block.8.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "155 | model.encoder.block.8.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "156 | model.encoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "157 | model.encoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \n",
            "158 | model.encoder.block.9                                                 | T5Block                    | 7 M   \n",
            "159 | model.encoder.block.9.layer                                           | ModuleList                 | 7 M   \n",
            "160 | model.encoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "161 | model.encoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "162 | model.encoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "163 | model.encoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "164 | model.encoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "165 | model.encoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "166 | model.encoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "167 | model.encoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \n",
            "168 | model.encoder.block.9.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "169 | model.encoder.block.9.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "170 | model.encoder.block.9.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "171 | model.encoder.block.9.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "172 | model.encoder.block.9.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "173 | model.encoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "174 | model.encoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \n",
            "175 | model.encoder.block.10                                                | T5Block                    | 7 M   \n",
            "176 | model.encoder.block.10.layer                                          | ModuleList                 | 7 M   \n",
            "177 | model.encoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "178 | model.encoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "179 | model.encoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "180 | model.encoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "181 | model.encoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "182 | model.encoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "183 | model.encoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "184 | model.encoder.block.10.layer.0.dropout                                | Dropout                    | 0     \n",
            "185 | model.encoder.block.10.layer.1                                        | T5LayerFF                  | 4 M   \n",
            "186 | model.encoder.block.10.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "187 | model.encoder.block.10.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "188 | model.encoder.block.10.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "189 | model.encoder.block.10.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "190 | model.encoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "191 | model.encoder.block.10.layer.1.dropout                                | Dropout                    | 0     \n",
            "192 | model.encoder.block.11                                                | T5Block                    | 7 M   \n",
            "193 | model.encoder.block.11.layer                                          | ModuleList                 | 7 M   \n",
            "194 | model.encoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "195 | model.encoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "196 | model.encoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "197 | model.encoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "198 | model.encoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "199 | model.encoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "200 | model.encoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "201 | model.encoder.block.11.layer.0.dropout                                | Dropout                    | 0     \n",
            "202 | model.encoder.block.11.layer.1                                        | T5LayerFF                  | 4 M   \n",
            "203 | model.encoder.block.11.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "204 | model.encoder.block.11.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "205 | model.encoder.block.11.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "206 | model.encoder.block.11.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "207 | model.encoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "208 | model.encoder.block.11.layer.1.dropout                                | Dropout                    | 0     \n",
            "209 | model.encoder.final_layer_norm                                        | T5LayerNorm                | 768   \n",
            "210 | model.encoder.dropout                                                 | Dropout                    | 0     \n",
            "211 | model.decoder                                                         | T5Stack                    | 137 M \n",
            "212 | model.decoder.block                                                   | ModuleList                 | 113 M \n",
            "213 | model.decoder.block.0                                                 | T5Block                    | 9 M   \n",
            "214 | model.decoder.block.0.layer                                           | ModuleList                 | 9 M   \n",
            "215 | model.decoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "216 | model.decoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "217 | model.decoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "218 | model.decoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "219 | model.decoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "220 | model.decoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \n",
            "222 | model.decoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "223 | model.decoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \n",
            "224 | model.decoder.block.0.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "225 | model.decoder.block.0.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "226 | model.decoder.block.0.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "227 | model.decoder.block.0.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "228 | model.decoder.block.0.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "229 | model.decoder.block.0.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding                  | 384   \n",
            "231 | model.decoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "232 | model.decoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \n",
            "233 | model.decoder.block.0.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "234 | model.decoder.block.0.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "235 | model.decoder.block.0.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "236 | model.decoder.block.0.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "237 | model.decoder.block.0.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "238 | model.decoder.block.0.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "239 | model.decoder.block.0.layer.2.dropout                                 | Dropout                    | 0     \n",
            "240 | model.decoder.block.1                                                 | T5Block                    | 9 M   \n",
            "241 | model.decoder.block.1.layer                                           | ModuleList                 | 9 M   \n",
            "242 | model.decoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "243 | model.decoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "244 | model.decoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "245 | model.decoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "246 | model.decoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "247 | model.decoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "248 | model.decoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "249 | model.decoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \n",
            "250 | model.decoder.block.1.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "251 | model.decoder.block.1.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "252 | model.decoder.block.1.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "253 | model.decoder.block.1.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "254 | model.decoder.block.1.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "255 | model.decoder.block.1.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "256 | model.decoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "257 | model.decoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \n",
            "258 | model.decoder.block.1.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "259 | model.decoder.block.1.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "260 | model.decoder.block.1.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "261 | model.decoder.block.1.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "262 | model.decoder.block.1.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "263 | model.decoder.block.1.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "264 | model.decoder.block.1.layer.2.dropout                                 | Dropout                    | 0     \n",
            "265 | model.decoder.block.2                                                 | T5Block                    | 9 M   \n",
            "266 | model.decoder.block.2.layer                                           | ModuleList                 | 9 M   \n",
            "267 | model.decoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "268 | model.decoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "269 | model.decoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "270 | model.decoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "271 | model.decoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "272 | model.decoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "273 | model.decoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "274 | model.decoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \n",
            "275 | model.decoder.block.2.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "276 | model.decoder.block.2.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "277 | model.decoder.block.2.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "278 | model.decoder.block.2.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "279 | model.decoder.block.2.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "280 | model.decoder.block.2.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "281 | model.decoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "282 | model.decoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \n",
            "283 | model.decoder.block.2.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "284 | model.decoder.block.2.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "285 | model.decoder.block.2.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "286 | model.decoder.block.2.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "287 | model.decoder.block.2.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "288 | model.decoder.block.2.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "289 | model.decoder.block.2.layer.2.dropout                                 | Dropout                    | 0     \n",
            "290 | model.decoder.block.3                                                 | T5Block                    | 9 M   \n",
            "291 | model.decoder.block.3.layer                                           | ModuleList                 | 9 M   \n",
            "292 | model.decoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "293 | model.decoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "294 | model.decoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "295 | model.decoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "296 | model.decoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "297 | model.decoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "298 | model.decoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "299 | model.decoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \n",
            "300 | model.decoder.block.3.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "301 | model.decoder.block.3.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "302 | model.decoder.block.3.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "303 | model.decoder.block.3.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "304 | model.decoder.block.3.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "305 | model.decoder.block.3.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "306 | model.decoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "307 | model.decoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \n",
            "308 | model.decoder.block.3.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "309 | model.decoder.block.3.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "310 | model.decoder.block.3.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "311 | model.decoder.block.3.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "312 | model.decoder.block.3.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "313 | model.decoder.block.3.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "314 | model.decoder.block.3.layer.2.dropout                                 | Dropout                    | 0     \n",
            "315 | model.decoder.block.4                                                 | T5Block                    | 9 M   \n",
            "316 | model.decoder.block.4.layer                                           | ModuleList                 | 9 M   \n",
            "317 | model.decoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "318 | model.decoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "319 | model.decoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "320 | model.decoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "321 | model.decoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "322 | model.decoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "323 | model.decoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "324 | model.decoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \n",
            "325 | model.decoder.block.4.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "326 | model.decoder.block.4.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "327 | model.decoder.block.4.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "328 | model.decoder.block.4.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "329 | model.decoder.block.4.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "330 | model.decoder.block.4.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "331 | model.decoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "332 | model.decoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \n",
            "333 | model.decoder.block.4.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "334 | model.decoder.block.4.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "335 | model.decoder.block.4.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "336 | model.decoder.block.4.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "337 | model.decoder.block.4.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "338 | model.decoder.block.4.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "339 | model.decoder.block.4.layer.2.dropout                                 | Dropout                    | 0     \n",
            "340 | model.decoder.block.5                                                 | T5Block                    | 9 M   \n",
            "341 | model.decoder.block.5.layer                                           | ModuleList                 | 9 M   \n",
            "342 | model.decoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "343 | model.decoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "344 | model.decoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "345 | model.decoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "346 | model.decoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "347 | model.decoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "348 | model.decoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "349 | model.decoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \n",
            "350 | model.decoder.block.5.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "351 | model.decoder.block.5.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "352 | model.decoder.block.5.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "353 | model.decoder.block.5.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "354 | model.decoder.block.5.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "355 | model.decoder.block.5.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "356 | model.decoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "357 | model.decoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \n",
            "358 | model.decoder.block.5.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "359 | model.decoder.block.5.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "360 | model.decoder.block.5.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "361 | model.decoder.block.5.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "362 | model.decoder.block.5.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "363 | model.decoder.block.5.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "364 | model.decoder.block.5.layer.2.dropout                                 | Dropout                    | 0     \n",
            "365 | model.decoder.block.6                                                 | T5Block                    | 9 M   \n",
            "366 | model.decoder.block.6.layer                                           | ModuleList                 | 9 M   \n",
            "367 | model.decoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "368 | model.decoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "369 | model.decoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "370 | model.decoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "371 | model.decoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "372 | model.decoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "373 | model.decoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "374 | model.decoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \n",
            "375 | model.decoder.block.6.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "376 | model.decoder.block.6.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "377 | model.decoder.block.6.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "378 | model.decoder.block.6.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "379 | model.decoder.block.6.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "380 | model.decoder.block.6.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "381 | model.decoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "382 | model.decoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \n",
            "383 | model.decoder.block.6.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "384 | model.decoder.block.6.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "385 | model.decoder.block.6.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "386 | model.decoder.block.6.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "387 | model.decoder.block.6.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "388 | model.decoder.block.6.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "389 | model.decoder.block.6.layer.2.dropout                                 | Dropout                    | 0     \n",
            "390 | model.decoder.block.7                                                 | T5Block                    | 9 M   \n",
            "391 | model.decoder.block.7.layer                                           | ModuleList                 | 9 M   \n",
            "392 | model.decoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "393 | model.decoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "394 | model.decoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "395 | model.decoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "396 | model.decoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "397 | model.decoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "398 | model.decoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "399 | model.decoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \n",
            "400 | model.decoder.block.7.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "401 | model.decoder.block.7.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "402 | model.decoder.block.7.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "403 | model.decoder.block.7.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "404 | model.decoder.block.7.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "405 | model.decoder.block.7.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "406 | model.decoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "407 | model.decoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \n",
            "408 | model.decoder.block.7.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "409 | model.decoder.block.7.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "410 | model.decoder.block.7.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "411 | model.decoder.block.7.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "412 | model.decoder.block.7.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "413 | model.decoder.block.7.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "414 | model.decoder.block.7.layer.2.dropout                                 | Dropout                    | 0     \n",
            "415 | model.decoder.block.8                                                 | T5Block                    | 9 M   \n",
            "416 | model.decoder.block.8.layer                                           | ModuleList                 | 9 M   \n",
            "417 | model.decoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "418 | model.decoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "419 | model.decoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "420 | model.decoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "421 | model.decoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "422 | model.decoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "423 | model.decoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "424 | model.decoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \n",
            "425 | model.decoder.block.8.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "426 | model.decoder.block.8.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "427 | model.decoder.block.8.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "428 | model.decoder.block.8.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "429 | model.decoder.block.8.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "430 | model.decoder.block.8.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "431 | model.decoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "432 | model.decoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \n",
            "433 | model.decoder.block.8.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "434 | model.decoder.block.8.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "435 | model.decoder.block.8.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "436 | model.decoder.block.8.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "437 | model.decoder.block.8.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "438 | model.decoder.block.8.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "439 | model.decoder.block.8.layer.2.dropout                                 | Dropout                    | 0     \n",
            "440 | model.decoder.block.9                                                 | T5Block                    | 9 M   \n",
            "441 | model.decoder.block.9.layer                                           | ModuleList                 | 9 M   \n",
            "442 | model.decoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "443 | model.decoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "444 | model.decoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "445 | model.decoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "446 | model.decoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "447 | model.decoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "448 | model.decoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "449 | model.decoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \n",
            "450 | model.decoder.block.9.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "451 | model.decoder.block.9.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "452 | model.decoder.block.9.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "453 | model.decoder.block.9.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "454 | model.decoder.block.9.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "455 | model.decoder.block.9.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "456 | model.decoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "457 | model.decoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \n",
            "458 | model.decoder.block.9.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "459 | model.decoder.block.9.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "460 | model.decoder.block.9.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "461 | model.decoder.block.9.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "462 | model.decoder.block.9.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "463 | model.decoder.block.9.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "464 | model.decoder.block.9.layer.2.dropout                                 | Dropout                    | 0     \n",
            "465 | model.decoder.block.10                                                | T5Block                    | 9 M   \n",
            "466 | model.decoder.block.10.layer                                          | ModuleList                 | 9 M   \n",
            "467 | model.decoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "468 | model.decoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "469 | model.decoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "470 | model.decoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "471 | model.decoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "472 | model.decoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "473 | model.decoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "474 | model.decoder.block.10.layer.0.dropout                                | Dropout                    | 0     \n",
            "475 | model.decoder.block.10.layer.1                                        | T5LayerCrossAttention      | 2 M   \n",
            "476 | model.decoder.block.10.layer.1.EncDecAttention                        | T5Attention                | 2 M   \n",
            "477 | model.decoder.block.10.layer.1.EncDecAttention.q                      | Linear                     | 589 K \n",
            "478 | model.decoder.block.10.layer.1.EncDecAttention.k                      | Linear                     | 589 K \n",
            "479 | model.decoder.block.10.layer.1.EncDecAttention.v                      | Linear                     | 589 K \n",
            "480 | model.decoder.block.10.layer.1.EncDecAttention.o                      | Linear                     | 589 K \n",
            "481 | model.decoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "482 | model.decoder.block.10.layer.1.dropout                                | Dropout                    | 0     \n",
            "483 | model.decoder.block.10.layer.2                                        | T5LayerFF                  | 4 M   \n",
            "484 | model.decoder.block.10.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "485 | model.decoder.block.10.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "486 | model.decoder.block.10.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "487 | model.decoder.block.10.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "488 | model.decoder.block.10.layer.2.layer_norm                             | T5LayerNorm                | 768   \n",
            "489 | model.decoder.block.10.layer.2.dropout                                | Dropout                    | 0     \n",
            "490 | model.decoder.block.11                                                | T5Block                    | 9 M   \n",
            "491 | model.decoder.block.11.layer                                          | ModuleList                 | 9 M   \n",
            "492 | model.decoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "493 | model.decoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "494 | model.decoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "495 | model.decoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "496 | model.decoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "497 | model.decoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "498 | model.decoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "499 | model.decoder.block.11.layer.0.dropout                                | Dropout                    | 0     \n",
            "500 | model.decoder.block.11.layer.1                                        | T5LayerCrossAttention      | 2 M   \n",
            "501 | model.decoder.block.11.layer.1.EncDecAttention                        | T5Attention                | 2 M   \n",
            "502 | model.decoder.block.11.layer.1.EncDecAttention.q                      | Linear                     | 589 K \n",
            "503 | model.decoder.block.11.layer.1.EncDecAttention.k                      | Linear                     | 589 K \n",
            "504 | model.decoder.block.11.layer.1.EncDecAttention.v                      | Linear                     | 589 K \n",
            "505 | model.decoder.block.11.layer.1.EncDecAttention.o                      | Linear                     | 589 K \n",
            "506 | model.decoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "507 | model.decoder.block.11.layer.1.dropout                                | Dropout                    | 0     \n",
            "508 | model.decoder.block.11.layer.2                                        | T5LayerFF                  | 4 M   \n",
            "509 | model.decoder.block.11.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "510 | model.decoder.block.11.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "511 | model.decoder.block.11.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "512 | model.decoder.block.11.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "513 | model.decoder.block.11.layer.2.layer_norm                             | T5LayerNorm                | 768   \n",
            "514 | model.decoder.block.11.layer.2.dropout                                | Dropout                    | 0     \n",
            "515 | model.decoder.final_layer_norm                                        | T5LayerNorm                | 768   \n",
            "516 | model.decoder.dropout                                                 | Dropout                    | 0     \n",
            "517 | model.lm_head                                                         | Linear                     | 24 M  \n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "61d58772a6a64c5c8ad30dab2563a56f",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\r"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "26a0cb124049417aa9dbdd010e3af03a",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "1681a9ce7f9340caa50c4204777a6f9e",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:***** Validation results *****\n",
            "INFO:__main__:avg_val_loss = tensor(0.0846, device='cuda:0')\n",
            "\n",
            "INFO:__main__:loss = tensor(0.0290, device='cuda:0')\n",
            "\n",
            "INFO:__main__:train_loss = tensor(0.0290, device='cuda:0')\n",
            "\n",
            "INFO:__main__:val_loss = tensor(0.0846, device='cuda:0')\n",
            "\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "ff475d6cdc074c14aa7b2cfede771b07",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:***** Validation results *****\n",
            "INFO:__main__:avg_train_loss = tensor(0.5601, device='cuda:0')\n",
            "\n",
            "INFO:__main__:avg_val_loss = tensor(0.0696, device='cuda:0')\n",
            "\n",
            "INFO:__main__:epoch = 0\n",
            "\n",
            "INFO:__main__:loss = tensor(0.0134, device='cuda:0')\n",
            "\n",
            "INFO:__main__:train_loss = tensor(0.0134, device='cuda:0')\n",
            "\n",
            "INFO:__main__:val_loss = tensor(0.0696, device='cuda:0')\n",
            "\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "1"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 70
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GwdWdHG0RP5J",
        "colab_type": "text"
      },
      "source": [
        "### Eval"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "dq7cCiOPRQzs",
        "colab": {}
      },
      "source": [
        "import textwrap\n",
        "from tqdm.auto import tqdm\n",
        "from sklearn import metrics"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "XKsHzqGMRQzz",
        "colab": {}
      },
      "source": [
        "dataset = EmotionDataset(tokenizer, 'emotion_data', 'test', 512)\n",
        "loader = DataLoader(dataset, batch_size=32, shuffle=True)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "QK7s7IpERQz5",
        "colab": {}
      },
      "source": [
        "it = iter(loader)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "outputId": "a49604ae-31da-49bc-9a90-bb5bd1366ebf",
        "id": "5_79Jk36RQz-",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "batch = next(it)\n",
        "batch[\"source_ids\"].shape"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "torch.Size([32, 512])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 74
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "RQZKyEaVRQ0B",
        "colab": {}
      },
      "source": [
        "outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \n",
        "                              attention_mask=batch['source_mask'].cuda(), \n",
        "                              max_length=2)\n",
        "\n",
        "dec = [tokenizer.decode(ids) for ids in outs]\n",
        "\n",
        "texts = [tokenizer.decode(ids) for ids in batch['source_ids']]\n",
        "targets = [tokenizer.decode(ids) for ids in batch['target_ids']]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "outputId": "93cdd40b-310f-458d-e5ae-21debf158a39",
        "id": "aAjhiBcrRQ0E",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "for i in range(32):\n",
        "    c = texts[i]\n",
        "    lines = textwrap.wrap(\"text:\\n%s\\n\" % c, width=100)\n",
        "    print(\"\\n\".join(lines))\n",
        "    print(\"\\nActual sentiment: %s\" % targets[i])\n",
        "    print(\"predicted sentiment: %s\" % dec[i])\n",
        "    print(\"=====================================================================\\n\")"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "text: i feel like ive come a long way and im proud of what ive achieved not only this week but this\n",
            "year as well\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: i feel unfathomably rich in having had a healthy pregnancy so far\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: im just feeling emo and bitchy atm\n",
            "\n",
            "Actual sentiment: anger\n",
            "predicted sentiment: anger\n",
            "=====================================================================\n",
            "\n",
            "text: i always feel troubled when we re on the road touring living in a van or more recently in the\n",
            "circus buses no place to hang my hat as the song lyric has it\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i still feel confused and guilty about the whole thing\n",
            "\n",
            "Actual sentiment: fear\n",
            "predicted sentiment: fear\n",
            "=====================================================================\n",
            "\n",
            "text: i feel immensely distracted by the barrage of media i receive solicit\n",
            "\n",
            "Actual sentiment: anger\n",
            "predicted sentiment: anger\n",
            "=====================================================================\n",
            "\n",
            "text: im feeling too tortured to write today\n",
            "\n",
            "Actual sentiment: fear\n",
            "predicted sentiment: anger\n",
            "=====================================================================\n",
            "\n",
            "text: i have the joy of allowing kids to feel like the valued treasures that they are and to just\n",
            "have a blast being a kid alongside with them but can i just say its an incredibly humbling\n",
            "experience to have influence into a childs life and to know that what you do and say is being\n",
            "internalized\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: i dont want flowers or candy but the kind of guy that knows i like thinly sliced limes in my\n",
            "mineral water because it makes me feel glamorous and is humored by how pretentious that is\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: i just was expressing myself and her unexpected and kind gesture made me feel bad for a short\n",
            "moment as that was not my intent but for a larger moment which remains with me it reminded me of my\n",
            "blessings like having good friends that have your back\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: im feeling brave ill snatch him to on my lap and after a few seconds of struggling he\n",
            "completely relaxes and submits to mommy scratches\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: im sick of feeling unimportant like nobody needs me\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i feel like these unfortunate events fit in with my thought quote i posted above\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i feel like they don t think it s sincere when it really is she told us exclusively\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: i feel a little low about being in japan and i always feel pangs of guilt when i fail to\n",
            "appreciate my living situation and decisions\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i couldn t help but feel pissed off at both sides of the debate and the unnecessary dichotomy\n",
            "itself\n",
            "\n",
            "Actual sentiment: anger\n",
            "predicted sentiment: anger\n",
            "=====================================================================\n",
            "\n",
            "text: i felt so bad for the bad grade and feeling like having to hide it that i didnt know what to\n",
            "say except to declare in all my frustration that i hated school\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i do feel proud and happy and also very grateful to all who read me\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: i am going to have to check on in just a few minutes but there is this clock up above the\n",
            "screen that keeps ticking down the minutes i have left so am feeling a bit frantic\n",
            "\n",
            "Actual sentiment: fear\n",
            "predicted sentiment: fear\n",
            "=====================================================================\n",
            "\n",
            "text: i am feeling bitchy this evening\n",
            "\n",
            "Actual sentiment: anger\n",
            "predicted sentiment: anger\n",
            "=====================================================================\n",
            "\n",
            "text: i feel like my room is messy if theyre open\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: im starting to feel really pathetic giving the bulk of my enthusiasm these days to the\n",
            "kardashians us weekly and roseanne marathons and completely ignoring this blog\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i am feeling content and happy with myself\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: i feel slightly saddened to know that some of the kids have also resigned during my absence\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i feel that passionate about\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: i too feel a sense of melancholy for them\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i can t quite figure out how i feel i m not devastated like i was with lucy and i m not sure\n",
            "if that s because it s easier to do after the first time or what\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i feel ashamed of you\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i like the fresh feeling of sweet he gave me\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n",
            "text: i feel so jaded and bored\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i feel fake because i think if you really want to have a good conversation and make good\n",
            "contact you have to appear especially self confident and even risk talking to some people which are\n",
            "no good to talk to at all until you meet one person which you have a good connection to\n",
            "\n",
            "Actual sentiment: sadness\n",
            "predicted sentiment: sadness\n",
            "=====================================================================\n",
            "\n",
            "text: i am feeling pretty fearless\n",
            "\n",
            "Actual sentiment: joy\n",
            "predicted sentiment: joy\n",
            "=====================================================================\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "iq8M8nbTSJlE",
        "colab_type": "text"
      },
      "source": [
        "#### Test Metrics"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "S-oIXmoCR6kl",
        "colab_type": "code",
        "outputId": "98bdff55-aa82-45a3-dc13-be0e78e52ea9",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 66,
          "referenced_widgets": [
            "8933ab7f935e4776970ddfe35f5da135",
            "84eb2bf17a9048fc94b6f47867d1b0ba",
            "cdd7554792cf4c73922e2f050d1fcaaf",
            "a32aa193a82f478387c14f384c2c689e",
            "e4cbd76c110541cbbf1386e299c4d9d6",
            "da67548f1abc4727965f72b8cb367681",
            "63b11aa7ee0c4271aedb87ad3e7d23c3",
            "720b90b3f86c4e5da15447777806e9a7"
          ]
        }
      },
      "source": [
        "dataset = EmotionDataset(tokenizer, 'emotion_data', 'test', 512)\n",
        "loader = DataLoader(dataset, batch_size=32, num_workers=4)\n",
        "model.model.eval()\n",
        "outputs = []\n",
        "targets = []\n",
        "for batch in tqdm(loader):\n",
        "  outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \n",
        "                              attention_mask=batch['source_mask'].cuda(), \n",
        "                              max_length=2)\n",
        "\n",
        "  dec = [tokenizer.decode(ids) for ids in outs]\n",
        "  target = [tokenizer.decode(ids) for ids in batch[\"target_ids\"]]\n",
        "  \n",
        "  outputs.extend(dec)\n",
        "  targets.extend(target)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "8933ab7f935e4776970ddfe35f5da135",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "C9CYCGM6SRzb",
        "colab": {}
      },
      "source": [
        "for i, out in enumerate(outputs):\n",
        "  if out not in emotions:\n",
        "    print(i, 'detected invalid prediction')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "outputId": "24a4fe9c-3396-4364-aad3-8da50d456618",
        "id": "iE0WX_GbSRzq",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "metrics.accuracy_score(targets, outputs)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.929"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 82
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "outputId": "01a97ad3-3c70-43b6-e6a4-55ea5ccfa010",
        "id": "mWkOZ7BASRz5",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 235
        }
      },
      "source": [
        "print(metrics.classification_report(targets, outputs))"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "              precision    recall  f1-score   support\n",
            "\n",
            "       anger       0.94      0.93      0.93       275\n",
            "        fear       0.86      0.92      0.89       224\n",
            "         joy       0.97      0.93      0.95       695\n",
            "        love       0.79      0.89      0.84       159\n",
            "     sadness       0.97      0.96      0.97       581\n",
            "    surprise       0.75      0.74      0.75        66\n",
            "\n",
            "    accuracy                           0.93      2000\n",
            "   macro avg       0.88      0.90      0.89      2000\n",
            "weighted avg       0.93      0.93      0.93      2000\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "W6p9MGb6lWL5",
        "colab_type": "text"
      },
      "source": [
        "Now lets plot  the confusion matrix and see for which classes our model is getting confused"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9RtgfuzucFeN",
        "colab_type": "code",
        "outputId": "0dc41da4-f99e-4469-8d0c-f055d4a18a8d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 70
        }
      },
      "source": [
        "import seaborn as sn\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
            "  import pandas.util.testing as tm\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2ioVvq5rcHZE",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "cm = metrics.confusion_matrix(targets, outputs)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4rM5XS09SSdm",
        "colab_type": "code",
        "outputId": "171788f5-4c43-485c-b84a-133ad78e2486",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 462
        }
      },
      "source": [
        "df_cm = pd.DataFrame(cm, index = [\"anger\", \"fear\", \"joy\", \"love\", \"sadness\", \"surprise\"], columns = [\"anger\", \"fear\", \"joy\", \"love\", \"sadness\", \"surprise\"])\n",
        "plt.figure(figsize = (10,7))\n",
        "sn.heatmap(df_cm, annot=True, cmap='Purples', fmt='g')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:numexpr.utils:NumExpr defaulting to 4 threads.\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7f213a4498d0>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 86
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiYAAAGbCAYAAADwcltwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdeZgU1fX/8feZGdZBhn1AHWRVf4qSKBpxRxQVUBnZXCIuCEkElCRKNOCSRIMxi4p+VcANN1xwQ0WIigqKCkgUNCSCRBSEAVkFRGDm/P7oAgdlZgrsnqru+bx46pmq21Vdp4vu6tP33rpl7o6IiIhIHGRFHYCIiIjIdkpMREREJDaUmIiIiEhsKDERERGR2FBiIiIiIrGRk+odTJ78iS77CaHzSa2jDiFtZGVZ1CGIiJQrO6dyT1Qn2nVJ+659w/8Y6UlWNSYiIiISGymvMREREZHUMsucmmTVmIiIiEhsqMZEREQk3WVOhYkSExERkXRnGXRRgJpyREREJDZUYyIiIpLmMqjvqxITERGRtJdBmYmackRERCQ2VGMiIiKS5jKowkSJiYiISLrTVTkiIiIiKaAaExERkXSXQW05SkxERETSXAblJWrKERERkfhQjYmIiEiay6S7CysxERERSXeZk5eoKUdERETiQzUmIiIiaS6TxjFRYiIiIpLmMqiLiZpyREREJD5UYyIiIpLuMqjKRImJiIhImsugvERNOSIiIhKemdUzswlm9h8zm29mHc2sgZm9YmYLgr/1g3XNzEaZ2UIzm2tmh1X0/EpMRERE0pxlWdKmEG4HJrv7gUB7YD5wNfCau7cFXguWAU4H2gbTQODuip5ciYmIiEi6M0veVO5uLA84HrgPwN23uPta4CxgXLDaOKBHMH8W8JAnvAvUM7Nm5e1DiYmIiIjsYGYDzWx2qWlgqYdbAiuBB8zsX2Z2r5nlAvnuvixYZzmQH8zvA3xRavslQVmZMrLz65o1K3nkkVv5+uu1mEHHjqdx4oln8vLLj/HOO1OoUycPgG7d+nHwwR1YtaqIkSMvo0mTxLHab78D6Nt3UJQvIRIjrh3OtGlv0qBBA557diIAd9wxiqmvTyUry2jQoCE33fhnmjRpEnGk8bFs2TKuueZqvlq1CjPo07sPF1zQL+qwYmn69OmMvPnPFBeX0KtnLwYMGBB1SLF18imdyc3NJSsrm5ycbJ56ckLUIcWS3lPfSWbnV3cfA4wp4+Ec4DBgiLu/Z2a3812zzfbt3cx8T/efkYlJVlY2PXpcQkFBGzZv3sTf/vZrDjzwJwCceOJZnHTS2T/YpmHDpgwbNqqyQ42VHmcVct655/P74d+9xy6++BKGDLkcgEcefZi777mL66+7IaII4ycnJ5thw4Zx0EEHs3HjRnr17knHjkfTpk2bqEOLleLiYm686U/cO/Y+8vPz6du3D506ddJxKseDD4yjfv36UYcRW3pP7awSb+K3BFji7u8FyxNIJCZFZtbM3ZcFTTUrgseXAgWltt83KCtThU05QY/agorWi5O8vAYUFCTenDVr1iY/v4C1a1dFHFX8dejQgby8vJ3K6tSps2P+m2++yag7WCZD48ZNOOiggwHIzc2lVavWrFhRFHFU8TNv3lyaFzSnoKCA6tWrc3rXrkx9fWrUYUka03sqGu6+HPjCzA4IijoD/wYmAhcGZRcCzwfzE4F+QS5xFLCuVJPPLlVYYxJUyUwCDtmD1xC5VauKWLLkU1q0OID//W8+06e/xMyZr9O8eRt69OhP7dqJL97Vq4u45ZYrqFmzFt26XUDr1gdHHHl83D7qNiZOnMhee9Xh/vsejDqc2Fq6dCnz58/n0EPbRx1K7BQVraBps6Y7lpvm5zN37twII4o3M+PSAf0xM/r07kufPn2iDil29J76nsr9zTgEeNTMqgOLgItJVHQ8aWb9gcXA9jftJKArsBDYFKxbrrCdX+eY2RFhIy7dcWbSpCfCbpZ03377DfffP5Kzzx5AzZq1OeaY07n22jEMG3Y7devW57nn7gMSNSw33HA/w4bdTmHhpTz00N/YvHlTZHHHzRWXD+W1V6fSrVt3Hhv/aNThxNLGjRu5YujlXHP11TvVMonsiUcefpSnJzzD6HvGMH78Y8yePSvqkCTmKvNyYXf/wN07uPuh7t7D3de4+yp37+zubd39ZHdfHazr7j7I3Vu7+yHuPrui5w+bmPwMeMfMPg0GSJlnZmWmpu4+Jgi6Q9eufUPuIrmKi7dx//0j6dDhRNq3PxqAunXrk5WVTVZWFh07nsrixZ8AkJNTjdzcugAUFLShUaOmrFhRbhNYldS9W3deffWVqMOIna1btzJ06BV073YGp5zSJepwYik/vwnLly3fsby8qIgm+fnlbFG15QfHpmHDhnQ++WTmzpsXcUTxo/fU91gSp4iFTUxOBVoDJwFnAN2Dv7Hk7owfP4r8/AI6deqxo3zdutU75ufOfYdmzfYDYMOGdZSUFAPw1VfLWbnySxo2bIrA4sWf7ZifOnUqLVu2ii6YGHJ3rr1uBK1ateKiiy6KOpzYatfuEBZ/vpglS5awZcsWXp40iU6dOkUdVixt2rSJjRs37pifMeNt2rZpG3FU8aP3VOYKdVWOuy82s2OBtu7+gJk1BmJbX71o0b+ZNet1mjVrwS23JK4o6datH3PmvMnSpf8DjIYNm9CnT+KS4IULP+Lllx8lOzsn0abbZxC5uXtF+AqicdWwK5k1ayZr166lc+dOXDZoMNOnT+Ozz/6HWRZ777031117fdRhxsqcOXOYOHEi+++/P4VnFwIwdOhQTjj+hIgji5ecnByGDx/BgIGXUlJSQmHh2fqyLcOqVau4/PIhAGwr3ka3bt057rjjIo4qfvSe2lkmXZhg7hVfamxm1wMdgAPcfX8z2xt4yt2PqWjbyZM/2eNrmauSzie1jjqEtJEVbshkEZHIZOdU7omqR/O/Je279rnPr4z0JBu2KacQOBPYCODuXwJVr0pBREREUirsAGtbSo/kFgw/KyIiInGQQTeYCZuYPGlmo0ncfGcAcAkwNnVhiYiISFiZ1MckbOfXv5nZKcB64ADgOnfXdaMiIiKSVKHvlRMkIkpGREREYiaDKkzCJSZm9jXw/R6/64DZwG/dfVGyAxMREZGQMigzCVtjchuJOwo+RmJcuHNIDLg2B7gfODEVwYmIiEjVEjYxOdPdS9+ZbIyZfeDuvzOz36ciMBEREQkngypMQicmm8ysDzAhWO4FbA7mNYCaiIhIhMLcfC9dhL3y+XzgAmAFUBTM/9zMagGDUxSbiIiIVDFhLxdeRNk37XsreeGIiIjIbsugtpywV+U0BgYALUpv4+6XpCYsERERCSuD8pLQfUyeB6YDrwLFqQtHREREqrKwiUltd/9dSiMRERGRPZJJQ9KH7fz6opl1TWkkIiIismeykjhFLGwIV5BITr4xs/Vm9rWZrU9lYCIiIlL1hL0qZy8zawC0BWqmNiQRERHZHZnUlBP2qpxLSdSa7At8ABwFzAA6py40ERERCSOTEpPdaco5Aljs7p2An5K4iZ+IiIhI0oS9Kmezu282M8yshrv/x8wOSGlkIiIiEorFoNNqsoRNTJaYWT3gOeAVM1sDLE5dWCIiIhJaBjXlhO38WhjM3mBmrwN5wOSURSUiIiJVUtgakx3c/c1UBCIiIiJ7JoMqTHY/MREREZF4sazMyUwyqLuMiIiIpDvVmIiIiKS7DGrLSXlicsrJbVK9i4zw/pylUYeQNg4/bJ+oQxCpkkpKPOoQ0kZ2Je8vg/ISNeWIiIhIfKgpR0REJM1lUudXJSYiIiLpLoPactSUIyIiIrGhGhMREZE0l0EVJkpMRERE0l0m9TFRU46IiIjEhmpMRERE0l3mVJgoMREREUl3lkGdTNSUIyIiIrGhGhMREZE0l0mdX5WYiIiIpLkMaslRU46IiIjEh2pMRERE0l0GVZkoMREREUlzmdTHRE05IiIiEhuqMREREUlzGdSSo8REREQk7WVQZqKmHBEREYkNJSYiIiJpzsySNoXY12dmNs/MPjCz2UFZAzN7xcwWBH/rB+VmZqPMbKGZzTWzwyp6fiUmIiIiac6ykjeF1Mndf+LuHYLlq4HX3L0t8FqwDHA60DaYBgJ3V/TESkxERETkxzoLGBfMjwN6lCp/yBPeBeqZWbPynkiJiYiISLozS9pkZgPNbHapaeD39ubAP83s/VKP5bv7smB+OZAfzO8DfFFq2yVBWZl0VY6IiEiaS+ZFOe4+BhhTzirHuvtSM2sCvGJm//ne9m5mvqf7V2IiIiKS5ipz5Fd3Xxr8XWFmzwJHAkVm1szdlwVNNSuC1ZcCBaU23zcoK5OackRERCQUM8s1s722zwNdgI+AicCFwWoXAs8H8xOBfsHVOUcB60o1+eySakxERETSXeUNsJYPPBtcVpwDPObuk81sFvCkmfUHFgN9gvUnAV2BhcAm4OKKdlClEpPhI4bz5ptv0KBBAyY+/0LU4URu1aoVjB79F9atW4OZ0alTN0499Ww2bFjPnXfeyFdfFdGoUT5DhlxLbu5evPTSE8yYMRWA4uJivvzyc+66awJ16tSN+JVEa/r06Yy8+c8UF5fQq2cvBgwYEHVIsaTjFN7Jp3QmNzeXrKxscnKyeerJCVGHFAsjrh3OtGlv0qBBA557diIAU6ZM5q67/49FixYxfvwTtDu4XcRRRqOy8hJ3XwS030X5KqDzLsodGLQ7+6hSiUlhjx6cf955XH3N1RWvXAVkZ2dz3nm/pEWLtnzzzSauu+5XtGt3ONOmTeHgg3/KGWecywsvjOeFFx7nnHMG0K1bX7p16wvAnDnvMHny01U+KSkuLubGm/7EvWPvIz8/n759+9CpUyfatGkTdWixouO0+x58YBz169ePOoxY6XFWIeedez6/H/7dObxN27bcduso/vDHG6ILTJKqSvUx6dDhCPLy6kUdRmzUq9eQFi3aAlCrVm323rs5q1d/xZw5MzjuuC4AHHdcF95//+0fbPvuu1Pp2LFTpcYbR/PmzaV5QXMKCgqoXr06p3ftytTXp0YdVuzoOEkydOjQgby8vJ3KWrdqTcuWLSOKKD4sy5I2Ra3cxMTMsr9/GZBkppUrl7N48ULatDmQ9evXUK9eQwDy8hqwfv2andb99tvNzJ07myOOOC6KUGOlqGgFTZs13bHcND+fFUVFEUYUTzpOu8fMuHRAf3r17smTTz4ZdTiSDpI4jknUyk1M3L0Y+K+ZNd+dJy09OMvYseVdCi1xsHnzN4wa9QfOP/8yatXK3emxRAennd+o//rXO7Rte3CVb8YRSZVHHn6Upyc8w+h7xjB+/GPMnj0r6pBEKk2YPib1gY/NbCawcXuhu59Z1galB2cp3layx4OsSOpt27aNUaNu4OijO++oAalbtz5r166iXr2GrF27irp1d27+evfdN9SME8jPb8LyZct3LC8vKqJJfn45W1RNOk67Jz84Ng0bNqTzySczd948OnQ4IuKoJM5iUNGRNGH6mFwLdAf+CPy91CRpzt25996/sffe+3H66b12lB92WEemT/8nANOn/5PDDjt6x2ObNm3gP/+Zu1NZVdau3SEs/nwxS5YsYcuWLbw8aRKdOilp+z4dp/A2bdrExo0bd8zPmPE2bdu0jTgqibtM6mNSYY2Ju79ZGYFUhiuv/C0zZ81k7dq1dDrpRAYPGkzPnr0q3C5TffLJR7z99qsUFLRk+PBfANC79yV0734Od955I2++OZlGjZowePC1O7aZPftt2rU7nJo1a0UVdqzk5OQwfPgIBgy8lJKSEgoLz9aXyC7oOIW3atUqLr98CADbirfRrVt3jjtO/bkArhp2JbOCc3jnzp24bNBg8vLyGPnnm1i9ZjWXXfYrDjzwQMaMHht1qPIjWOIS43JWSIzUdgfw/4DqQDaw0d1DdTBQU044788pd4ReKeXww8q9/5OIpEiJTuehVaueXalVD0N6P5a0/5w7njov0mqTMH1M7gTOAZ4COgD9gP1TGZSIiIjshuhbYJIm1Dgm7r4QyHb3Ynd/ADgttWGJiIhIVRSmxmSTmVUHPjCzW4BlVLGB2UREROIsDp1WkyVMgnFBsN5gEpcLFwA9UxmUiIiIhGdmSZuiFuaqnMVmVgto5u5/qISYREREpIqqsMbEzM4APgAmB8s/MbOJqQ5MREREQsqy5E1Rv5QQ69wAHAmsBXD3DwDdMUlERCQmMuhWOaESk63uvu57ZbqYXURERJIuzFU5H5vZeUC2mbUFLgdmpDYsERERCSsOnVaTpcwaEzN7OJj9FDgY+BYYD6wHhqY+NBEREQklg/qYlFdjcriZ7Q30BTqx8437agObUxmYiIiIVD3lJSb3AK8BrYDZpcqNRB+TVimMS0RERELKoJacshMTdx8FjDKzu939V5UYk4iIiOyGKjXyq5ISERERqSxhrsoRERGROMugthwlJiIiImmuSlwuLCIiIlLZVGMiIiKS5iyDqhmUmIiIiKQ5NeWIiIiIpIBqTERERNJdBtWYKDERERFJc5nUxySDXoqIiIikO9WYiIiIpLlM6vyqxERERCTdVaV75YiIiIhUFtWYiIiIpDk15UjSHX7YPlGHkDY6V7sh6hDSwssbRkQdQlqoXkOnwbCyMqi5INNkUF6iphwRERGJD/1UEBERSXcZVJulxERERCTNZVIfEzXliIiISGyoxkRERCTNZVCFiRITERGRtJdBfUzUlCMiIiKxoRoTERGRNJdJnV+VmIiIiKQ5U1OOiIiISPKpxkRERCTdZU6FiRITERGRdJdJfUzUlCMiIiKxoRoTERGRNKfOryIiIhIbZpa0KeT+ss3sX2b2YrDc0szeM7OFZvaEmVUPymsEywuDx1tU9NxKTERERNKdJXEK5wpgfqnlvwC3unsbYA3QPyjvD6wJym8N1iuXEhMREREJzcz2BboB9wbLBpwETAhWGQf0CObPCpYJHu9sFVTLKDERERFJc8lsyjGzgWY2u9Q08Hu7uw0YBpQEyw2Bte6+LVheAuwTzO8DfAEQPL4uWL9M6vwqIiKS5pJ5tbC7jwHG7Ho/1h1Y4e7vm9mJydvrd5SYiIiISFjHAGeaWVegJlAXuB2oZ2Y5Qa3IvsDSYP2lQAGwxMxygDxgVXk7UFOOiIhImjNL3lQed7/G3fd19xbAOcBUdz8feB3oFax2IfB8MD8xWCZ4fKq7e3n7UI2JiIhImovByK+/Ax43sxuBfwH3BeX3AQ+b2UJgNYlkplxKTERERGS3ufsbwBvB/CLgyF2ssxnovTvPq8REREQkzUVfYZI8SkxERETSXAyacpJGnV9FREQkNlRjIiIikuYyqMKkaiUm3377Lf36XcCWLVvYVryNLl1OZcjgIVGHFUvTp09n5M1/pri4hF49ezFgwICoQ4pUnbyaXHXvWbRs1wR3+Mslz/Hvd78AoM9vjuayv5/GWY1uZt2qTQD85IQWDL7tdLKrZbPuq00MPfH+KMOPxLfffssll17E1i1b2FZczMmdT+GyXw3ivffe5dbb/0FJSQm1a9fmjzfcSPPmzaMONzZ0ngrv5FM6k5ubS1ZWNjk52Tz15ISKN8pQmdSUU6USk+rVq3P//Q+Qm5vL1q1b+fkFP+f4446jffufRB1arBQXF3PjTX/i3rH3kZ+fT9++fejUqRNt2rSJOrTIDL79dGZOXsD1vZ8gp1o2NWtXA6DxvnXp0KUNyxev3bFunbyaDL2rO8NOe5gVX6yjXuPcqMKOVPXq1Rk7+j5q167N1q1bubj/hRx7zLHcNPJGbvvHKFq1asUTTz7O2PtG86c/3BR1uLGh89TuefCBcdSvXz/qMCSJKuxjYmZnmFlG9EUxM3JzE18S27ZtY9u2rZlV/5Uk8+bNpXlBcwoKCqhevTqnd+3K1NenRh1WZHLr1qD98S146b45AGzbWsyGdZsBGHzr6YweNgVKjRfU+bxDmP7MfFZ8sQ6AtSs3Vn7QMWBm1K5dG9j+edu2414cGzduAGDDhg00btQkyjBjR+cp2ROVNcBaZQhTY9IXuM3Mngbud/f/pDimlCouLqZX7158/vnnnHfuubQ/tH3UIcVOUdEKmjZrumO5aX4+c+fOjTCiaDVrWZ+1Kzdy9QOFtG7flE/e/5I7rpjE4Se3ZuXS9Xw6t2in9Qv2b0R2tSxue/1iau1Vg6dvf4d/PvxhRNFHq7i4mHPP78sXX3xO3z7ncMghh3L9tTcw+PLLqFGjBnVy6/DQuEejDjN2dJ4Kx8y4dEB/zIw+vfvSp0+fqEOKjBGDjCJJKqwJcfefAz8FPgUeNLN3gjsP7lXWNqXvTDh27C7vAxSZ7Oxsnn3mWV6f+jrz5s1jwYJPog5JYi47J4v9D2vG83fPYsBhd/PNxi1cdEMnzv/98Txw3Q9rkrJzsjjg8L25utsjDDv1IfpdeyL7ti33ZpoZKzs7mycfn8CUya/y0ccfsXDhAh559GHuHHUX/5z8Gmee2YO//+OvUYcZOzpPhfPIw4/y9IRnGH3PGMaPf4zZs2dFHZIkQagmGndfD0wAHgeaAYXAHDPbZY8sdx/j7h3cvcOAAd+/W3I81K1blyOPPJLpb70VdSixk5/fhOXLlu9YXl5URJP8/AgjitbKJetZuWQ982cuAeDNCf+m7WF706xlPe778DIe/9+vabxvXcbM+SUN8uuwcsl6Zk5ZyOZNW1m3ahMfTvuM1u2bVrCXzFZ3r7oc0eEI3nr7LT5Z8F8OOeRQAE7tchoffvhBxNHFl85T5csPzksNGzak88knM3fevIgjik4mNeWE6WNyppk9S2LY2WrAke5+OtAe+G1qw0uu1atXs379egA2b97MjHfeoVXLlhFHFT/t2h3C4s8Xs2TJErZs2cLLkybRqVOnqMOKzOqiDaz4Yj0F+ydqPQ7v3IoFc76kMP8Wzml5K+e0vJWVS9Yz8LB7WF20gbeen88hx+5HdnYWNWpV46Cf7cvn81dG/Coq3+o1q1n/9Xeft3fffZdWLVuxYcMGFi/+DIB333uHli1bRRhl/Og8Fc6mTZvYuHHjjvkZM96mbZu2EUcVnUxKTML0MekJ3Oru00oXuvsmM+ufmrBSY+XKlVzz+2soKSmmpKSE0049jRNPrLpfuGXJyclh+PARDBh4KSUlJRQWnl2lP/AAo4a8xIhHe5FTPZtli9Zw88XPlrnu5//5ipmTF3Df3MvwEuele+fwv49XVGK08fDVypVce/0ISoqLKXGnyyldOP74E7huxA389qpfk2VZ7FW3Ln+4/o9RhxorOk+Fs2rVKi6/PFFpv614G926dee4446LOCpJBqvg7sOJlczygSOCxZnuHvosW7ytpOIdiOyGztVuiDqEtPDyhhFRh5AWqteoUqMmSCXJzsmq1LqHf9zyZtK+a38z7IRI603CNOX0BmaSuDtgH+A9M+uV6sBEREQknKrWlDMCOGJ7LYmZNQZeJdEZVkRERCRpwiQmWd9rulmFbv4nIiISH3Go6kiSMInJZDObAowPls8BXk5dSCIiIrI7MigvqTgxcferzOxs4Jig6B53fy61YYmIiEhVVGZiYmZvufuxZvY14LBjvNuBZlYCrAb+6u53VUKcIiIiUoYqcXdhdz82+LvLoefNrCEwA1BiIiIiEqEMykv2vBOru68CTkxeKCIiIlLV/aiRhdx9WbICERERkT1TJZpyREREJD1kUF6i8UhEREQkPlRjIiIikuYyqMJEiYmIiEi6y6Q+JmrKERERkdhQjYmIiEiay6AKEyUmIiIi6U5NOSIiIiIpoBoTERGRNJdBFSZKTERERNKdmnJEREREUkA1JiIiImkugypMlJiIiIiku0xKTNSUIyIiIrGhGhMREZE0l0mdX5WYiIiIpLkMykvUlCMiIiLxoRoTSTuvfHt91CGkhaVfros6hLRQsG+9qEMQ+dHUlCMiIiLxkTl5iZpyREREJD5UYyIiIpLm1JQjIiIisaHERERERGIjg/IS9TERERGR+FCNiYiISJpTU46IiIjERgblJWrKERERkfhQjYmIiEiay6SmHNWYiIiIpDkzS9pUwX5qmtlMM/vQzD42sz8E5S3N7D0zW2hmT5hZ9aC8RrC8MHi8RUWvRYmJiIiIhPUtcJK7twd+ApxmZkcBfwFudfc2wBqgf7B+f2BNUH5rsF65lJiIiIikObPkTeXxhA3BYrVgcuAkYEJQPg7oEcyfFSwTPN7ZKqiWUWIiIiKS5pLZlGNmA81sdqlp4Pf2lW1mHwArgFeAT4G17r4tWGUJsE8wvw/wBUDw+DqgYXmvRZ1fRUREZAd3HwOMKefxYuAnZlYPeBY4MJn7V2IiIiKS5iyr8q/Kcfe1ZvY60BGoZ2Y5Qa3IvsDSYLWlQAGwxMxygDxgVXnPq6YcERGRNFdZfUzMrHFQU4KZ1QJOAeYDrwO9gtUuBJ4P5icGywSPT3V3L28fqjERERGRsJoB48wsm0TlxpPu/qKZ/Rt43MxuBP4F3Besfx/wsJktBFYD51S0AyUmIiIiaa6yBlhz97nAT3dRvgg4chflm4Heu7MPJSYiIiJpLoMGflUfExEREYkP1ZiIiIikuUy6V44SExERkTSXSYmJmnJEREQkNlRjIiIikuYyqMJEiYmIiEjay6DMRE05IiIiEhtVKjFZtmwZF110Id3P6M4ZZ3bn4Ycfijqk2Jo+fTpdu53OqaedytixY6MOJ1ZGXDuc4084lh6FZ/7gsQfHPUC7Qw5izZo1EUQWvb///Ub69OnKwIHn/+CxCRMe49RTO7Ju3VoApk6dwi9/+XN+8YvzGTp0AJ9+uqCyw42l4SOGc+xxx3DmWWdEHUrs6Tz1nWTeXThqVSoxycnJZtiwYbz4wos8Pv4JHhv/GAsXLow6rNgpLi7mxpv+xOh7xvDCxBeYNOklHadSepxVyD13//DGm8uWL2PGjBk0a9YsgqjioUuXbtx0060/KF+xoog5c2bSpEnTHWX5+c3461/vYvToRzn//Eu4/fabKzPU2Crs0YMxo8u8sasEdJ7aWWXdK6cyVKnEpHHjJhx00MEA5Obm0qpVa1asKIo4qviZN28uzQuaU1BQQPXq1StI0YEAACAASURBVDm9a1emvj416rBio0OHDuTl5f2g/JZb/sJvfvPbWPziiMohh/yUvfaq+4Py0aNvp3//QTud9A4++NAd6x544MF89dWKygoz1jp0OIK8vHpRhxF7Ok9lrtCJiZntZ2YnB/O1zGyv1IWVekuXLmX+/Pkcemj7qEOJnaKiFTRt9t0v26b5+awoUgJXnqlTX6NJkyYceMCBUYcSOzNmTKNRo8a0bt22zHUmT36BI47oWIlRSbrTeWpnlmVJm6IWKjExswHABGB0ULQv8Fw56w80s9lmNnvs2PhVSW7cuJErhl7ONVdfTZ06daIOR9LcN998w9h7xzB40JCoQ4mdzZs38/jj4+jXb0CZ63zwwftMmfIC/fsPqsTIRDJLJjXlhL1ceBCJuwa+B+DuC8ysSVkru/sYYAxA8bYS/7FBJtPWrVsZOvQKunc7g1NO6RJ1OLGUn9+E5cuW71heXlREk/z8CCOKty+++IKlS5fSs1chAEVFRfTu05PHxz9Bo0aNI44uWsuWLWH58mX86lcXALBy5UoGDbqIUaPuo0GDhixatJDbbhvJjTf+g7p1f9g8JlIWnacyV9jE5Ft337K97dzMcoBYJRxhuDvXXjeCVq1acdFFF0UdTmy1a3cIiz9fzJIlS2jSpAkvT5rELX/9a9Rhxdb+++/PtDff2rHc5dSTeeLxp6hfv36EUcVDy5ZtePLJSTuW+/Ur5I47HiAvrx4rViznj3+8mquuuo59920eYZSSjnSe2lkm9W0Lm5i8aWa/B2qZ2SnAZcALqQsrNebMmcPEiRPZf//9KTw78et26NChnHD8CRFHFi85OTkMHz6CAQMvpaSkhMLCs2nbpuz+AVXNVcOuZNasmaxdu5bOnTtx2aDB9Dy7Z9RhxcLIkdcxd+4c1q1by/nnn8kFF1zKaaf98LJqgEcfvZ+vv17PnXf+DYDs7GzuvPOBygw3lq688rfMDN5fnU46kcGDBtOzZ6+ow4odnad2lkmJiblXXPFhZllAf6ALYMAU4F4PsXHcmnIk/ZXoLRXK0i/XRR1CWijYV1fASPJl51RuL9KXXvpP0k6M3bodGGmWE7bGpAfwkLtX7RFsREREYiiDKkxCXy58BvCJmT1sZt2DPiYiIiISA1Vu5Fd3vxhoAzwFnAt8amb3pjIwERERqXpC13y4+1Yze5nE1Ti1SDTvXJqqwERERCScONR0JEvYAdZON7MHgQVAT+BeoGm5G4mIiEilqIoDrPUDngB+4e7fpjAeERERqcJCJSbufq6Z5QOnBNVFM91dd9wSERGJgarYlNMbmAn0BvoA75mZRvwRERGJgUy6KidsU84I4IjttSRm1hh4lcSN/URERESSImxikvW9pptVhB8DRURERFIoBhUdSRM2MZlsZlOA8cFyX2BSOeuLiIhIJbHKHQE/pcJ2fr3KzHoCxwRFY9z92dSFJSIiIlXR7gyw9jTwdApjERERkT1QZZpyzOxrEiO9/uAhwN29bkqiEhERkdCMzMlMyk1M3H2vygpERERERHcJFhERSXeZU2GixERERCTdxWFgtGTRWCQiIiISG6oxERERSXMZVGGixERERCTdqSlHREREJAVUYyIiIpLmMqjCRImJiIhIusukphwlJiIiImkug/IS9TERERGR+FCNiYiISJpTU85uKCnZ1T0A5fsy6D2VcllZOlhhFOxbL+oQ0sK57e+IOoS0Mf7DIVGHIGXIpO8QNeWIiIhIbKgpR0REJM1lUo2JEhMREZE0Zxl0e2E15YiIiEhsqMZEREQkzakpR0RERGIjky4XVlOOiIiIhGJmBWb2upn928w+NrMrgvIGZvaKmS0I/tYPys3MRpnZQjOba2aHVbQPJSYiIiJpzix5UwW2Ab9194OAo4BBZnYQcDXwmru3BV4LlgFOB9oG00Dg7op2oMREREQkzZlZ0qbyuPsyd58TzH8NzAf2Ac4CxgWrjQN6BPNnAQ95wrtAPTNrVt4+lJiIiIjIDmY20Mxml5oGlrFeC+CnwHtAvrsvCx5aDuQH8/sAX5TabElQViZ1fhUREUlzyez76u5jgDHl78/qAE8DQ919femaFnd3M9vj+9EoMREREUlzlXlVjplVI5GUPOruzwTFRWbWzN2XBU01K4LypUBBqc33DcrKpKYcERERCcUSGdB9wHx3/0ephyYCFwbzFwLPlyrvF1ydcxSwrlSTzy6pxkRERCTdVV6FyTHABcA8M/sgKPs9cDPwpJn1BxYDfYLHJgFdgYXAJuDiinagxERERCTNVVZTjru/RdlpUOddrO/AoN3Zh5pyREREJDZUYyIiIpLmMmhEeiUmIiIi6U73yhERERFJAdWYiIiIpLnMqS9RYiIiIpL21JQjIiIikgKqMREREUlzGVRhosREREQk3akpR0RERCQFVGMiIiKS5jKowkSJiYiISLrLpMRETTkiIiISGxlfYzLi2uFMm/YmDRo04LlnJwIwZcpk7rr7/1i0aBHjxz9Bu4PbRRxl/Dz88MNMePop3J1evXrT74J+UYcUS8uWLeOaa67mq1WrMIM+vftwgY7VLk2fPp2RN/+Z4uISevXsxYABA6IOKVJ3/vNiNm/cQkmJU7ythGv6Pk7vy35G517tWL/mGwDG3zaDf03/jOxqWQy8vjOtD25CiTsPjnyTf89aGvEriJ7eU9/JpM6vGZ+Y9DirkPPOPZ/fD796R1mbtm257dZR/OGPN0QXWIwtWLCACU8/xePjn6BatWr84pcDOeGEE9iv+X5RhxY7OTnZDBs2jIMOOpiNGzfSq3dPOnY8mjZt2kQdWqwUFxdz401/4t6x95Gfn0/fvn3o1KlTlT9Of7j4ab5eu3mnspce+hcvPDhnp7KTeyV+PF1Z+Ch1G9Ti9/ecxTV9H8e90kKNHb2ndpZBecnuN+WYWX0zOzQVwaRChw4dyMvL26msdavWtGzZMqKI4m/Rok859JBDqVWrFjk5OXTocASvvvpq1GHFUuPGTTjooIMByM3NpVWr1qxYURRxVPEzb95cmhc0p6CggOrVq3N6165MfX1q1GGljX1bN+Cj974AYP3qb9j49RZatcuPOKpo6T2VuUIlJmb2hpnVNbMGwBxgrJn9I7WhSVTatGnL+3PeZ+3atXzzzTdMnz6N5cuXRR1W7C1dupT58+dz6KHtow4ldoqKVtC0WdMdy03z81lRVMUTOHeGjy3k5ifPoXPv75qTTz2vPX995nx+9aeTya1bA4DP/vsVHTq1IivbaLxPXVod1IRGTfeKKvJY0HtqZ2aWtClqYZty8tx9vZldCjzk7teb2dyyVjazgcBAgLv+724uvbTqtvulo9atW9P/kksZMPBSatWqxYEHHEhWVnbUYcXaxo0buWLo5Vxz9dXUqVMn6nAkDVx7wVOsWbGRug1qMeLeQr5ctJp/PjGPCffMBHf6DulIv6uO4+5rX+X1Zz5m31YNuPnJc1n55Xr++8EySopLon4JIikRNjHJMbNmQB9geEUru/sYYAzA1i3FVbgVNH317NmTnj17AnDbbbeS37RpBVtUXVu3bmXo0Cvo3u0MTjmlS9ThxFJ+fhOWL1u+Y3l5URFN8qt2U8SaFRuBRNPMrFc/pc0hTZn//pc7Hn9twkf87q4zASgpdsb9ZdqOx/70SG++XLy2cgOOGb2nMlfYPiZ/BKYAC919lpm1AhakLiyJ2qpVqwD4ctmXvPraq3Tr2i3iiOLJ3bn2uhG0atWKiy66KOpwYqtdu0NY/PlilixZwpYtW3h50iQ6deoUdViRqVErh5q1q+2YP/To5ny+cBX1GtXesc6RJ7fhiwWJz2H1mjnUqJX4HXlIx+YUFztLP11d+YHHiN5TO6tyTTnu/hTwVKnlRUDPVAWVTFcNu5JZs2aydu1aOnfuxGWDBpOXl8fIP9/E6jWrueyyX3HggQcyZvTYqEONlaG/voK1a9eSk1ONEcNHULdu3ahDiqU5c+YwceJE9t9/fwrPLgRg6NChnHD8CRFHFi85OTkMHz6CAQMvpaSkhMLCs2nbpm3UYUUmr2FtrhzVHYDs7Czeeum/fPjWYgaP7EKLAxvjDiu/XM+YG15LrN+gFsPHFFJS4qxesYE7r54SZfixoPfUzmKQTySNeYjrzczsFuBG4BtgMnAo8Gt3f6SibdWUE04mvalSLQ4ZvWSOc9vfEXUIaWP8h0OiDiFtZOdkVeqJ6tNPVyXtu7Z164aRnmTDNuV0cff1QHfgM6ANcFWqghIREZGqKXTn1+BvN+Apd1+nX60iIiLxkElfyWETkxfN7D8kmnJ+ZWaNgc0VbCMiIiKyW8J2fr066Geyzt2LzWwTcFZqQxMREZEwjMypMgk78mtt4DLg7qBob6BDqoISERGR3WBJnCIWtvPrA8AW4OhgeSmJq3REREREkiZsYtLa3W8BtgK4+yZikVeJiIiIWfKmqIXt/LrFzGoBDmBmrYFvUxaViIiIhJZJfUzCJibXkxhYrcDMHgWOAS5KVVAiIiJSNYW9KucVM5sDHEWiCecKd/8qpZGJiIhIOJlTYRK6xgSgJrAm2OYgM8Pdp1WwjYiIiKRYBuUl4RITM/sL0Bf4GCgJih1QYiIiIiJJE7bGpAdwgLurw6uIiEjMZNJtYsImJouAauhKHBERkfjJnLwkdGKyCfjAzF6jVHLi7penJCoREREJLYPyktCJycRgEhEREUmZsJcLj0t1ICIiIrJnqkwfEzObRzDa6664+6FJj0hERESqrIpqTLoHfwcFfx8O/v6cchIWERERkT1RbmLi7osBzOwUd/9pqYd+F4wEe3UqgxMREZGKZVBLTui7C5uZHVNq4ejd2FZERERSyMySNkUt7FU5/YH7zSyPxFVJa4BLUhaViIiIVElhr8p5H2gfJCa4+7qURiUiIiJVUuib+JlZN+BgoOb2qh53/2OK4hIREZGQYtACkzSh+omY2T0kbuI3hERTTm9gvxTGJSIiIlVQ2A6sR7t7P2CNu/8B6Ajsn7qwREREJCxL4r+ohW3K2Rz83WRmewOrgWZhNszKiv5FpoOSEg0LE1YmVVlK9MZ/OCTqENLG5s1bow4hbeTWqVG5O8yg82LYxOQFM6sH/BWYQ2JwtbEpi0pERESqpLBNOf8Bit39aeD/gHeB51IWlYiIiIRmlryp4n3Z/Wa2wsw+KlXWwMxeMbMFwd/6QbmZ2SgzW2hmc83ssIqeP2xicq27f21mxwInAfcCd4fcVkRERFLIkjiF8CBw2vfKrgZec/e2wGt8NzL86UDbYBpIiNwhbGJSHPztBox195eA6iG3FRERkQzh7tNI9DUt7SxgXDA/DuhRqvwhT3gXqGdm5fZRDZuYLDWz0SQuGZ5kZjV2Y1sRERFJpSS25ZjZQDObXWoaGCKCfHdfFswvB/KD+X2AL0qttyQoK1PYzq99SFTb/M3d1wbZzlUhtxUREZEUSuZFOe4+BhjzI7Z3M9vjS03DDkm/CXim1PIyYFnZW4iIiEgVUmRmzdx9WVB5sSIoXwoUlFpv36CsTGqOERERSXOVeVVOGSYCFwbzFwLPlyrvF1ydcxSwrlSTzy6FvleOiIiIxFQljjxpZuOBE4FGZrYEuB64GXjSzPoDi0l0AQGYBHQFFgKbgIsrfH731I44WrxNQ5qGoZFfw9NowiLR0Miv4eXWqVGpJ6pVX21M2pdIw0a5kZ5kVWMiIiKS5jLp55oSExERkTSXSfcQU+dXERERiQ3VmIiIiKS9zKkyUWIiIiKS5tSUIyIiIpICSkxEREQkNtSUIyIikubUlCMiIiKSAqoxERERSXuZU2WixERERCTNqSlHREREJAWUmIiIiEhsqClHREQk3akpR0RERCT5VGMiIiKS5iyDqkxUYyIiIiKxUeUSk+nTp9O12+mcetqpjB07NupwYmXEtcM5/oRj6VF45o6yO+4YReHZPejZq5ABAy9lxYoVEUYYP8NHDOfY447hzLPOiDqU2NNnL5xvv/2Wvn37UFjYgzPO7M4dd94RdUixU1xczLnn9eHyKwYDMHPme5x3Xh969ynkuuuGs23btogjlB+jSiUmxcXF3HjTnxh9zxhemPgCkya9xMKFC6MOKzZ6nFXIPXeP2ans4osv4dlnnuPpCc9ywgkncPc9d0UUXTwV9ujBmNFjKl6xitNnL7zq1atz//0P8Oyzz/HM08/y1ltv8eGHH0QdVqyMH/8oLVu0BKCkpITrbxjByJG38NSTz9Ks2d68+OLEiCOsfGbJm6JWpRKTefPm0rygOQUFBVSvXp3Tu3Zl6utTow4rNjp06EBeXt5OZXXq1Nkx/80332BxeNfGSIcOR5CXVy/qMGJPn73wzIzc3FwAtm3bxrZtW+PxbRETRUXLmf7WNHr0OBuAdevWUi2nGvvt1wKAnx11FK9NfTXCCOXHCpWYmFm+md1nZi8HyweZWf/UhpZ8RUUraNqs6Y7lpvn5rCgqijCi9HD7qNvofPJJvPTSiwweNCTqcCQN6bO3e4qLiyk8u5BjjzuWozseTftD20cdUmz87e+3cMUVvyErK/H1Va9efbYVF/Pvf38MwGuvvkLR8uVRhig/UtgakweBKcDewfInwNCyVjazgWY228xmjx2rau50d8XlQ3nt1al069adx8Y/GnU4IhkvOzubZ595ltenvs68efNYsOCTqEOKhWnT3qRB/QYc9P8O2lFmZowceQt/+/stXNDvPGrn5pKVnR1hlBHJoLacsJcLN3L3J83sGgB332ZmxWWt7O5jgDEAxdtK/MeHmRz5+U1Yvuy7THp5URFN8vMjjCi9dO/WnV9d9kvVmshu02dvz9StW5cjjzyS6W+9Rdu2+0cdTuQ+/PAD3pz2Bm+9/RZbtnzLxg0bGT7iGm66cST33zcOgHfemcHnixdHHGnliz6dSJ6wNSYbzawh4ABmdhSwLmVRpUi7doew+PPFLFmyhC1btvDypEl06tQp6rBibfHiz3bMT506lZYtW0UXjKQtffbCW716NevXrwdg8+bNzHjnHVq1bBlxVPEwZMgVTH75VV56cTIj/3wLHY44kptuHMnq1asA2LJlCw+Ou5+ePXtHHKn8GGFrTH4DTARam9nbQGOgV8qiSpGcnByGDx/BgIGXUlJSQmHh2bRt0zbqsGLjqmFXMmvWTNauXUvnzp24bNBgpk+fxmef/Q+zLPbee2+uu/b6qMOMlSuv/C0zg2PW6aQTGTxoMD17pt1HI+X02Qtv5cqVXPP7aygpKaakpITTTj2NE09UEleecQ89yPTp03AvoVevPhx55M+iDqnyZVCVibmHa2kxsxzgABIv/7/uvjXMdnFqyomzEh2m0LKyMugTKJJGNm8OddoXILdOjUo9UW3a8G3SvkRqV3Ls3xf2qpzeQC13/xjoATxhZoelNDIRERGpcsL2MbnW3b82s2OBzsB9wN2pC0tERERCy6CrcsImJtuvwOkGjHX3l4DqqQlJREREqqqwiclSMxsN9AUmmVmN3dhWREREJJSwyUUfEgOsnerua4EGwFUpi0pERERCsyROUSv3cmEzq+vu64GawBtBWQPgW2B2yqMTERGRisUho0iSisYxeQzoDrxPYnC10i/dAY22JSIiEjHLoMyk3MTE3btb4nayJ7j755UUk4iIiFRRFfYx8cQIbC9VQiwiIiKyJzKok0nYzq9zzOyIlEYiIiIieySD8pLQ98r5GXC+mS0GNpKI3d390JRFJiIiIlVO2MTk1JRGISIiInsuDlUdSRIqMXH3xcG9cY4lcTXO2+4+J6WRiYiISEiZk5mEvYnfdcA4oCHQCHjAzEakMjARERGpeixx0U0FK5n9F2jv7puD5VrAB+5+QEXbFm8rSdqtmDNZiQ5TaFlZmfPLQCSdbN68NeoQ0kZunRqVeqLasnlb0r5EqtfMifQkG7aPyZckRn/dHCzXAJamJCIRERHZPRn0ey1sYrIO+NjMXiHRx+QUYKaZjQJw98tTFJ+IiIhUIWETk2eDabs3kh+KiIiI7IkMqjCpODExs2ygi7ufXwnxiIiIyO6yzElNwgxJXwzsZ2bVKyEeERERqcLCNuUsAt42s4kkRn4FwN3/kZKoREREpEoKm5h8GkxZwF6pC0dERER2Vwa15IQbx+TH0Dgm4Wgck/A0jolINDSOSXiVPY7Jtq3FSfsSyamWHf9xTMzsdRKXCe/E3U9KekQiIiJSZYVtyrmy1HxNoCewLfnhiIiIyO6ySmzLMbPTgNuBbOBed785qc+/p005ZjbT3Y+saD015YSjppzw1JQjEg015YRX2U05yfyuzc4p+yQbDCHyCYmBVpcAs4Bz3f3fydp/2KacBqUWs4AOQF6yghAREZG0cCSw0N0XAZjZ48BZQOUmJsD7JPqYGLAV+AzoH2bD8jKvqJjZQHcfE3UcpWVHHUAZ4nis4kjHKTwdq3DieJxy69SIOoQfiONxikIyv2vNbCAwsFTRmFLHeB/gi1KPLQF+lqx9Q4gB1gK/A37i7i2Bh0mMZbIpmYFUsoEVryIBHatwdJzC07EKR8cpHB2nJHP3Me7eodRUqYlf2MRkhLuvN7NjgZOAe4G7UxeWiIiIxNBSoKDU8r5BWdKETUyKg7/dgLHu/hKgIepFRESqlllAWzNrGdyq5hxgYjJ3EDYxWWpmo4G+wCQzq7Eb28ZRlW+P3A06VuHoOIWnYxWOjlM4Ok6VyN23AYOBKcB84El3/ziZ+wh1ubCZ1QZOA+a5+wIzawYc4u7/TGYwIiIiUrWlfEh6ERERkbDSuTlGREREMowSkyrMzC43s/lm9mjUsaQTM5sRdQxxZGYboo4hHZlZCzP7KOo4MpGZTTKzelHHIbtHTTkhWeJGBObuJVHHkixm9h/gZHdf8iOeIyfoDCVVnJltcPc6UceRbsysBfCiu7eLOJTYC3u+ycTzdVWS9jUmZvacmb1vZh8Ho9VhZhvM7CYz+9DM3jWz/KC8dbA8z8xuLP0Lz8yuMrNZZjbXzP4QlLUws/+a2UPAR+x87XZaM7N7gFbAy2Y23MzuN7OZZvYvMzsrWKeFmU03sznBdHRQfmJQPpEkDkOcLoL3l5nZX83so+D91Dd47CEz61Fq3Ue3H8+qopxj87iZdSu13oNm1svMsoP1t3/+fhFd9HvOzHLN7KXgvPORmfU1s+uC1/WRmY0JvjAxs8OD9T4EBpV6jovM7Bkzm2xmC8zsllKPdTGzd4LP4lNmVicov9nM/h0cu78FZb2DfX5oZtMq+VBUqIxj9ZmZNQoe72BmbwTzN5jZw2b2NvBwcIyeN7M3gmN0fbDeD87X259zV/sLtjnczN4MvkOmWOLCDomau6f1BDQI/tYi8WZsSGL4/DOC8ltIDBAH8CKJmw0B/BLYEMx3IXHJmZFI1l4EjgdaACXAUVG/zhQdu8+ARsCfgZ8HZfVI3KApF6gN1AzK2wKzg/kTSYz+2zLq1xDRcdtA4g7br5C4m0A+8DnQDDgBeC5YLw/4H5ATdcyVdVyCv2Udm0JgXLBOdRLDWtciMXLn9s9oDWB2Or63gtc9ttRy3vbzU7D8cKnz0lzg+GD+r8BHwfxFwKJg25rAYhI/iBoB04DcYL3fAdcF57v/8l3td73g7zxgn9JlcZrKOFafAY2C5Q7AG8H8DSRui1Kr1DFaFrz27ef9Drs6X5c6x+1qf9WAGUDjoKwvcH/Ux0aTp3+NCXB58KvjXRIf4LbAFhLJBSTe0C2C+Y7AU8H8Y6Weo0sw/QuYAxwYPA/AYnd/N1XBx0QX4Goz+wB4g8QJsTmJD+5YM5tH4rgdVGqbme7+v8oONEaOBca7e7G7FwFvAke4+5skBh9qDJwLPO1Vr6lrl8cGeBnoZIlxkE4Hprn7NyTef/2C9997JL5w2u76qWNtHnCKmf3FzI5z93UkXu97wWfoJOBgS/R5qOfu22syHv7e87zm7uvcfTOJGsn9gKNIfP7eDo7ThUH5OmAzcJ+Znc13twp5G3jQzAYQz1tx7epYlWdi8F7Z7hV3XxWUPUPiPQdln693tb8DgHbAK8ExHUFiFFOJWNib+MWSmZ0InAx0dPdNQdVfTWCrBykwiVFrK3qdBox099Hfe/4WJGoGMp0BPd39vzsVmt0AFAHtSdQkbS71cFU4LnvqIeDnJEZEvDjiWGLD3TcHn9FTSfw6fTx4yIAh7j4lqtiSwd0/MbPDgK7AjWb2Golmmg7u/kXweaoZ4qm+LTW//fxlJL6Mz/3+ymZ2JNAZ6EVi4KuT3P2XZvYzEqN1v29mh7v7qh/x8pKqjGO1je+6F3z/OH3/fPP9zpFexnrl7e9Z4GN377iHL0NSJN1rTPKANUFSciCJXxXleZdElR4kvjS2mwJcUqrNdh8za5L0aONrCjCkVPv3T4PyPGCZJzqQXUA8f3lFZTrQN+gf0ZhE09/M4LEHgaEA7l7l+uBQ/rF5gkSydhwwOSibAvzKzKoBmNn+ZpZbyTH/aGa2N7DJ3R8h0TxzWPDQV8G5pReAu68F1lri3mMA54d4+neBY8ysTbCv3OA41QHy3H0S8GsSPyIws9bu/p67XwesJGb948o4Vp8Bhwer9Cxj0+1OMbMGZlYL6EGihmh39/dfoLGZdQzWqWZmB+/hS5IkSusaExIntl+a2XwSb7KKmlyGAo+Y2fBg23UA7v5PM/t/wDvBd/MGEr94i8t6ogzzJ+A2YK6ZZZHoF9EduAt42sz6kTheqiVJcBK/tjoCHwbLw9x9OYC7FwXvyeeiCzFSZR4b4J8kmi6ed/ctQdm9JJpb/397d4wSMRAFYPh/ndpYeAArSxv1CB7BwlLtBCuvYLmCIrZaLVhZeADBWmQrDyFsI1gICj6LGTsJG1Q2cf+vTAYymRTz8l5mMqrB8Zgy2fTNKjCIiA/gHdin3Mcj8ET5x8iXXeAyIpIyJo0ycxwRO8BVLYVBKT28ADcRMUfJqhzWc4OIzim/ugAAALdJREFUWKnHbinPoku+G6t5SknqiFJSbnIPXFNKL8PMfKgZ7omvl5lvEbEFnEXEImU+PAV+dXt1tTdTy4WjbK3/mpkZEduUD2FnasWEfiYiloBRZi43tFmg1LTXJqidS2qhBmgbmXkw7b7ob/Q9Y9LWOnBe38qegb0p90c9UtPBd8BxQ5tN4AI4MSiRpPZmKmMiSZK6re8fv0qSpH/EwESSJHWGgYkkSeoMAxNJktQZBiaSJKkzPgFrstqlMbHCggAAAABJRU5ErkJggg==\n",
            "text/plain": [
              "<Figure size 720x504 with 2 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "QKh_bJxtlhkW",
        "colab_type": "text"
      },
      "source": [
        "From the above plot we can see that the most confused classes are 'joy' and 'love' which seems obivous as these two emotions are really close. We can say the same thing 'surprise' and 'anger' as well. So our model is doing pretty well."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "16TiclmeX1xE",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vZ-YLmJyg64T",
        "colab_type": "text"
      },
      "source": [
        "## SWAG\n",
        "\n",
        "Now lets try a more challenging task and see how it performs.\n",
        "\n",
        "SWAG is a natural language inference and commonsense reasoning task proposed in this [paper](https://arxiv.org/pdf/1808.05326.pdf).\n",
        "\n",
        "The basic task is that  a model is\n",
        "given a context **c = (s, n)**: a complete sentence\n",
        "**s** and a noun phrase **n** that begins a second sentence, as well as a list of possible verb phrase sentence endings **V**. The model must then\n",
        "select the most appropriate verb phrase **v** in **V**. For example\n",
        "\n",
        "On stage, a woman takes a seat at the piano. She\n",
        "\n",
        "a) sits on a bench as her sister plays with the doll.\n",
        "\n",
        "b) smiles with someone as the music plays.\n",
        "\n",
        "c) is in the crowd, watching the dancers.\n",
        "\n",
        "**d) nervously sets her fingers on the keys.**\n",
        "\n",
        "The correct answer is bolded. Given the above example the model should select **nervously sets her fingers on the keys** as the most appropriate verb phrase\n",
        "\n",
        "To frame this task in text-2-text setting the example is processed as below.\n",
        "\n",
        "context: context_text options: 1: option_1 2: option_2 3: option_3 4: option_4\n",
        "\n",
        "and if the actual label is 1 then the model is asked to predict the text '1'. Here's how the above example will be processed\n",
        "\n",
        "**Input**\n",
        "\n",
        "context: On stage, a woman takes a seat at the piano. She  options: 1: sits on a bench as her sister plays with the doll. 2: smiles with someone as the music plays. 3: is in the crowd, watching the dancers. 4: nervously sets her fingers on the keys.\n",
        "\n",
        "**Target**\n",
        "\n",
        "4\n",
        "\n",
        "This is just one possible way to process these examples, there are various other ways we can formulate this problem in text-2-text setting but that's for later."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hOxk-ZoJmamm",
        "colab_type": "text"
      },
      "source": [
        "### Dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yeHfgOhThLPj",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import csv\n",
        "from dataclasses import dataclass\n",
        "\n",
        "from enum import Enum\n",
        "from typing import List, Optional\n",
        "from transformers import PreTrainedTokenizer"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3DulV7U5hik7",
        "colab_type": "code",
        "outputId": "880c611b-d11c-4620-9d75-0bcfa423c1ff",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 386
        }
      },
      "source": [
        "!wget https://raw.githubusercontent.com/rowanz/swagaf/master/data/train.csv\n",
        "!wget https://raw.githubusercontent.com/rowanz/swagaf/master/data/val.csv\n",
        "\n",
        "!mkdir swag_data\n",
        "!mv *.csv swag_data"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2020-05-09 15:06:34--  https://raw.githubusercontent.com/rowanz/swagaf/master/data/train.csv\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 28243333 (27M) [text/plain]\n",
            "Saving to: ‘train.csv’\n",
            "\n",
            "train.csv           100%[===================>]  26.93M  35.9MB/s    in 0.8s    \n",
            "\n",
            "2020-05-09 15:06:35 (35.9 MB/s) - ‘train.csv’ saved [28243333/28243333]\n",
            "\n",
            "--2020-05-09 15:06:38--  https://raw.githubusercontent.com/rowanz/swagaf/master/data/val.csv\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 7893588 (7.5M) [text/plain]\n",
            "Saving to: ‘val.csv’\n",
            "\n",
            "val.csv             100%[===================>]   7.53M  17.5MB/s    in 0.4s    \n",
            "\n",
            "2020-05-09 15:06:39 (17.5 MB/s) - ‘val.csv’ saved [7893588/7893588]\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Tllm6irZg8IO",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# below code is adapted from https://github.com/huggingface/transformers/blob/master/examples/multiple-choice/utils_multiple_choice.py\n",
        "\n",
        "@dataclass(frozen=True)\n",
        "class InputExample:\n",
        "    \"\"\"\n",
        "    A single training/test example for multiple choice\n",
        "    Args:\n",
        "        example_id: Unique id for the example.\n",
        "        question: string. The untokenized text of the second sequence (question).\n",
        "        contexts: list of str. The untokenized text of the first sequence (context of corresponding question).\n",
        "        endings: list of str. multiple choice's options. Its length must be equal to contexts' length.\n",
        "        label: (Optional) string. The label of the example. This should be\n",
        "        specified for train and dev examples, but not for test examples.\n",
        "    \"\"\"\n",
        "\n",
        "    example_id: str\n",
        "    context: str\n",
        "    endings: List[str]\n",
        "    label: Optional[str]\n",
        "\n",
        "class Split(Enum):\n",
        "    train = \"train\"\n",
        "    dev = \"dev\"\n",
        "    test = \"test\"\n",
        "\n",
        "class DataProcessor:\n",
        "    \"\"\"Base class for data converters for multiple choice data sets.\"\"\"\n",
        "\n",
        "    def get_train_examples(self, data_dir):\n",
        "        \"\"\"Gets a collection of `InputExample`s for the train set.\"\"\"\n",
        "        raise NotImplementedError()\n",
        "\n",
        "    def get_dev_examples(self, data_dir):\n",
        "        \"\"\"Gets a collection of `InputExample`s for the dev set.\"\"\"\n",
        "        raise NotImplementedError()\n",
        "\n",
        "    def get_test_examples(self, data_dir):\n",
        "        \"\"\"Gets a collection of `InputExample`s for the test set.\"\"\"\n",
        "        raise NotImplementedError()\n",
        "\n",
        "    def get_labels(self):\n",
        "        \"\"\"Gets the list of labels for this data set.\"\"\"\n",
        "        raise NotImplementedError()\n",
        "\n",
        "class SwagProcessor(DataProcessor):\n",
        "    \"\"\"Processor for the SWAG data set.\"\"\"\n",
        "\n",
        "    def get_train_examples(self, data_dir):\n",
        "        \"\"\"See base class.\"\"\"\n",
        "        logger.info(\"LOOKING AT {} train\".format(data_dir))\n",
        "        return self._create_examples(self._read_csv(os.path.join(data_dir, \"train.csv\")), \"train\")\n",
        "\n",
        "    def get_dev_examples(self, data_dir):\n",
        "        \"\"\"See base class.\"\"\"\n",
        "        logger.info(\"LOOKING AT {} dev\".format(data_dir))\n",
        "        return self._create_examples(self._read_csv(os.path.join(data_dir, \"val.csv\")), \"dev\")\n",
        "\n",
        "    def get_test_examples(self, data_dir):\n",
        "        \"\"\"See base class.\"\"\"\n",
        "        logger.info(\"LOOKING AT {} dev\".format(data_dir))\n",
        "        raise ValueError(\n",
        "            \"For swag testing, the input file does not contain a label column. It can not be tested in current code\"\n",
        "            \"setting!\"\n",
        "        )\n",
        "        return self._create_examples(self._read_csv(os.path.join(data_dir, \"test.csv\")), \"test\")\n",
        "\n",
        "    def get_labels(self):\n",
        "        \"\"\"See base class.\"\"\"\n",
        "        return [\"0\", \"1\", \"2\", \"3\"]\n",
        "\n",
        "    def _read_csv(self, input_file):\n",
        "        with open(input_file, \"r\", encoding=\"utf-8\") as f:\n",
        "            return list(csv.reader(f))\n",
        "\n",
        "    def _create_examples(self, lines: List[List[str]], type: str):\n",
        "        \"\"\"Creates examples for the training and dev sets.\"\"\"\n",
        "        if type == \"train\" and lines[0][-1] != \"label\":\n",
        "            raise ValueError(\"For training, the input file must contain a label column.\")\n",
        "\n",
        "        examples = [\n",
        "            InputExample(\n",
        "                example_id=line[2],\n",
        "                # common beginning of each\n",
        "                # choice is stored in \"sent2\".\n",
        "                context=line[3],\n",
        "                endings=[line[7], line[8], line[9], line[10]],\n",
        "                label=line[11],\n",
        "            )\n",
        "            for line in lines[1:]  # we skip the line with the column names\n",
        "        ]\n",
        "\n",
        "        return examples"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-OXxGvqZjC9L",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "class SwagDataset(Dataset):\n",
        "  def __init__(self, tokenizer, data_dir, type_path,  max_len=512):\n",
        "    self.data_dir = data_dir\n",
        "    self.type_path = type_path\n",
        "    self.max_len = max_len\n",
        "    self.tokenizer = tokenizer\n",
        "    self.inputs = []\n",
        "    self.targets = []\n",
        "\n",
        "    self.proc = SwagProcessor()\n",
        "\n",
        "    self._build()\n",
        "  \n",
        "  def __getitem__(self, index):\n",
        "    source_ids = self.inputs[index][\"input_ids\"].squeeze()\n",
        "    target_ids = self.targets[index][\"input_ids\"].squeeze()\n",
        "\n",
        "    src_mask    = self.inputs[index][\"attention_mask\"].squeeze()  # might need to squeeze\n",
        "    target_mask = self.targets[index][\"attention_mask\"].squeeze()  # might need to squeeze\n",
        "\n",
        "    return {\"source_ids\": source_ids, \"source_mask\": src_mask, \"target_ids\": target_ids, \"target_mask\": target_mask}\n",
        "  \n",
        "  def __len__(self):\n",
        "    return len(self.inputs)\n",
        "  \n",
        "  def _build(self):\n",
        "    if self.type_path == 'train':\n",
        "      examples = self.proc.get_train_examples(self.data_dir)\n",
        "    else:\n",
        "      examples = self.proc.get_dev_examples(self.data_dir)\n",
        "    \n",
        "    for example in examples:\n",
        "      self._create_features(example)\n",
        "  \n",
        "  def _create_features(self, example):\n",
        "    input_ = example.context\n",
        "    options = ['%s: %s' % (i, option) for i, option in zip('1234', example.endings)]\n",
        "    options = \" \".join(options)\n",
        "    input_ = \"context: %s  options: %s </s>\" % (input_, options)\n",
        "    target = \"%s </s>\" % str(int(example.label) + 1)\n",
        "\n",
        "    # tokenize inputs\n",
        "    tokenized_inputs = self.tokenizer.batch_encode_plus(\n",
        "        [input_], max_length=self.max_len, pad_to_max_length=True, return_tensors=\"pt\"\n",
        "    )\n",
        "    # tokenize targets\n",
        "    tokenized_targets = self.tokenizer.batch_encode_plus(\n",
        "        [target], max_length=2, pad_to_max_length=True, return_tensors=\"pt\"\n",
        "    )\n",
        "\n",
        "    self.inputs.append(tokenized_inputs)\n",
        "    self.targets.append(tokenized_targets)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oKqFMTku3sDC",
        "colab_type": "code",
        "outputId": "97ce9f8a-4b75-4d95-ba04-fae101f8db82",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 186,
          "referenced_widgets": [
            "78b1b91a08214461b74fb1e143247d1e",
            "902a509471004d2691d807c4990fccd2",
            "74ec15497e1743a4af6be12e3bc1487d",
            "a70b457d9379403f9fac247de68bb8e3",
            "28f9d9aa0ece4831b0f9e412d8a88f8d",
            "7640680e1006492da75d873726567fed",
            "1090e3e017564a2281c60fb53a901c75",
            "9df2679ba627444e9b76bd2ff0ddc657"
          ]
        }
      },
      "source": [
        "tokenizer = T5Tokenizer.from_pretrained('t5-base')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:filelock:Lock 140245777042344 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n",
            "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpv2ybakmg\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "78b1b91a08214461b74fb1e143247d1e",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n",
            "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n",
            "INFO:filelock:Lock 140245777042344 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n",
            "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PIUiU7zSpbb3",
        "colab_type": "code",
        "outputId": "328b5f15-fe96-43ce-99e9-5d4233a7e97a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 50
        }
      },
      "source": [
        "dataset = SwagDataset(tokenizer, data_dir='swag_data', type_path='val')\n",
        "len(dataset)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:LOOKING AT swag_data dev\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "20006"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 14
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zxXGbCzB37HG",
        "colab_type": "code",
        "outputId": "8fbda79c-7be7-4d5f-8d5f-7b986a1c374b",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 70
        }
      },
      "source": [
        "data = dataset[69]\n",
        "print(tokenizer.decode(data['source_ids']))\n",
        "print(tokenizer.decode(data['target_ids']))"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "context: A little girl plays softly the drums holding two sticks while she is singing on a microphone. The, the girl options: 1: take in the greeting and an asian girl followed by two people standing on stage. 2: holds the microphone up and begins to girl dance an entire time. 3: claps the girls hands anxiously. 4: plays more fast the drums.\n",
            "4\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "aVfmE4O3Ku7H",
        "colab_type": "text"
      },
      "source": [
        "### Train"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "DDPxWUY86llx",
        "colab": {}
      },
      "source": [
        "!mkdir -p t5_swag"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "outputId": "fe4e58ab-6916-45f9-f742-797d87ad1ef4",
        "id": "PrWtMjcj6lmA",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        }
      },
      "source": [
        "args_dict.update({'data_dir': 'swag_data', 'output_dir': 't5_swag', 'num_train_epochs': 3})\n",
        "args = argparse.Namespace(**args_dict)\n",
        "print(args_dict)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "{'data_dir': 'swag_data', 'output_dir': 't5_swag', 'model_name_or_path': 't5-base', 'tokenizer_name_or_path': 't5-base', 'max_seq_length': 512, 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'warmup_steps': 0, 'train_batch_size': 8, 'eval_batch_size': 8, 'num_train_epochs': 3, 'gradient_accumulation_steps': 16, 'n_gpu': 1, 'early_stop_callback': False, 'fp_16': False, 'opt_level': 'O1', 'max_grad_norm': 1.0, 'seed': 42}\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "2Ojz3THj6lmK",
        "colab": {}
      },
      "source": [
        "checkpoint_callback = pl.callbacks.ModelCheckpoint(\n",
        "    filepath=args.output_dir, prefix=\"checkpoint\", monitor=\"val_loss\", mode=\"min\", save_top_k=5\n",
        ")\n",
        "\n",
        "train_params = dict(\n",
        "    accumulate_grad_batches=args.gradient_accumulation_steps,\n",
        "    gpus=args.n_gpu,\n",
        "    max_epochs=args.num_train_epochs,\n",
        "    early_stop_callback=False,\n",
        "    precision= 16 if args.fp_16 else 32,\n",
        "    amp_level=args.opt_level,\n",
        "    gradient_clip_val=args.max_grad_norm,\n",
        "    checkpoint_callback=checkpoint_callback,\n",
        "    callbacks=[LoggingCallback()],\n",
        ")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "Kk0x0Nql6lmQ",
        "colab": {}
      },
      "source": [
        "def get_dataset(tokenizer, type_path, args):\n",
        "  return SwagDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path,  max_len=args.max_seq_length)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "outputId": "94aa8d13-9d11-4fa9-979f-e3bbf15bb639",
        "id": "XDFGzzpQ6lmU",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "5c7427d7db844b9691d30cf2de1efc17",
            "bb0df1833ee3489da5c2a9c7b1306cc6",
            "3d2817812b6f475a8c838fd14646469a",
            "9d0f0c946790477fb8bc8bac64dfd7de",
            "8254b8062d5e4280bea46f8bc444c5db",
            "ab5f07ab5c574148a0062eb7f1ce5bcd",
            "47fdc2009efc443392ecd182996fcca9",
            "9b705e83fea84cbf912e33d6342be721",
            "e8e8ea6199df43019930ac7b557c46a5",
            "0566f29b017f47f399d7579d7929e046",
            "932309f0a40b46659c0cac7cc37fdc05",
            "da3665141bd44a24a5b5c9f36d4a9c52",
            "5c98e3a5b6a6403a936a725f4c30cdd3",
            "8da2b560fa9348098a2a7f09967d5f5f",
            "7e37cac227014717987922341f8099fe",
            "b95f98f98a76434591f90d41b43e39ba"
          ]
        }
      },
      "source": [
        "model = T5FineTuner(args)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:filelock:Lock 140242832534944 acquired on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\n",
            "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpwv74k3ig\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "5c7427d7db844b9691d30cf2de1efc17",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1199.0, style=ProgressStyle(description…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json in cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n",
            "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n",
            "INFO:filelock:Lock 140242832534944 released on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\n",
            "INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n",
            "INFO:transformers.configuration_utils:Model config T5Config {\n",
            "  \"architectures\": [\n",
            "    \"T5WithLMHeadModel\"\n",
            "  ],\n",
            "  \"d_ff\": 3072,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 768,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"n_positions\": 512,\n",
            "  \"num_heads\": 12,\n",
            "  \"num_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"task_specific_params\": {\n",
            "    \"summarization\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"length_penalty\": 2.0,\n",
            "      \"max_length\": 200,\n",
            "      \"min_length\": 30,\n",
            "      \"no_repeat_ngram_size\": 3,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"summarize: \"\n",
            "    },\n",
            "    \"translation_en_to_de\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to German: \"\n",
            "    },\n",
            "    \"translation_en_to_fr\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to French: \"\n",
            "    },\n",
            "    \"translation_en_to_ro\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to Romanian: \"\n",
            "    }\n",
            "  },\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:filelock:Lock 140242971659568 acquired on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\n",
            "INFO:transformers.file_utils:https://cdn.huggingface.co/t5-base-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp5pcfx_u3\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "e8e8ea6199df43019930ac7b557c46a5",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891691430.0, style=ProgressStyle(descri…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.file_utils:storing https://cdn.huggingface.co/t5-base-pytorch_model.bin in cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n",
            "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n",
            "INFO:filelock:Lock 140242971659568 released on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\n",
            "INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\n",
            "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "outputId": "57300f1a-14a8-4e26-8dac-9238e34741c0",
        "id": "1sQVILFo63Eb",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 50
        }
      },
      "source": [
        "trainer = pl.Trainer(**train_params)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:lightning:GPU available: True, used: True\n",
            "INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "STkqK5nC64YP",
        "colab_type": "code",
        "outputId": "cb613d72-009f-44eb-acd8-b9c3dd44b0cb",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "8e79d03deee94b299431330441bd64c8",
            "510043ffee634f86b89ec3fc060a74ea",
            "e86c5fbd48ce4215a0df353122183982",
            "bfc3a5a3cf2e49868053db6f1ef7785d",
            "361a2f79ed89495894d0b09a709f8f32",
            "f7e53d55f0234627a3b9f2c90eb8682f",
            "3584c01b0c5e47dfa373bae29461e94a",
            "cfd9db6f31474a8189e741bf8fdad6a9",
            "68705cee3df5458fb5145046337d925c",
            "4cf1613d58bd450780ac95c994686985",
            "3ee5f7cf56394175900ebb14ae0b5f9e",
            "9f054dcf926c45459b7aa728493571a0",
            "b52599dda9d94c83891d1c42c5f557e0",
            "a1cf907a3bcc4177b1d5dd9edbf30c20",
            "82b29ceeb21c417782e9e29a81eb47ea",
            "886260804ffd4e11bc93fb6e098111ab",
            "69f6eb1cb0434128961b5d83529813c5",
            "6723d50588a248d0ad7bb118de8c3fd5",
            "86d71b8233c14252a897ffa29ea6d9df",
            "d01c708e22ab423896271fa79860e7c3",
            "0e8da5995754472fac5fba1f8b30d107",
            "3dbee77f299f4e14a1698b60d609b8a1",
            "8c4c9025aaae44148591ae6f8bb37347",
            "29e2f2f0914e4dea8117844675b42be5",
            "0cfc8fa73f164b4fa5ddcbc3f115ef9b",
            "4559bd35b33f4804b968debaaf316463",
            "e403cc7718bf48f1b95150482e083f02",
            "f6248a9db7f2466a9ab3a4fbd214f265",
            "475e5353d31147d3ab156c0e7835684c",
            "c3f65d683c6e4fe18e31ecc305f8d455",
            "9b50abad66b44022aa389bc3f312db6b",
            "762b2941ff3e47d89b6e6ce4350bc058"
          ]
        }
      },
      "source": [
        "trainer.fit(model)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:lightning:\n",
            "    | Name                                                                  | Type                       | Params\n",
            "-----------------------------------------------------------------------------------------------------------------\n",
            "0   | model                                                                 | T5ForConditionalGeneration | 222 M \n",
            "1   | model.shared                                                          | Embedding                  | 24 M  \n",
            "2   | model.encoder                                                         | T5Stack                    | 109 M \n",
            "3   | model.encoder.block                                                   | ModuleList                 | 84 M  \n",
            "4   | model.encoder.block.0                                                 | T5Block                    | 7 M   \n",
            "5   | model.encoder.block.0.layer                                           | ModuleList                 | 7 M   \n",
            "6   | model.encoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "7   | model.encoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "8   | model.encoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "9   | model.encoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "10  | model.encoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "11  | model.encoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "12  | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \n",
            "13  | model.encoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "14  | model.encoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \n",
            "15  | model.encoder.block.0.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "16  | model.encoder.block.0.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "17  | model.encoder.block.0.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "18  | model.encoder.block.0.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "19  | model.encoder.block.0.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "20  | model.encoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "21  | model.encoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \n",
            "22  | model.encoder.block.1                                                 | T5Block                    | 7 M   \n",
            "23  | model.encoder.block.1.layer                                           | ModuleList                 | 7 M   \n",
            "24  | model.encoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "25  | model.encoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "26  | model.encoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "27  | model.encoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "28  | model.encoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "29  | model.encoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "30  | model.encoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "31  | model.encoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \n",
            "32  | model.encoder.block.1.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "33  | model.encoder.block.1.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "34  | model.encoder.block.1.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "35  | model.encoder.block.1.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "36  | model.encoder.block.1.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "37  | model.encoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "38  | model.encoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \n",
            "39  | model.encoder.block.2                                                 | T5Block                    | 7 M   \n",
            "40  | model.encoder.block.2.layer                                           | ModuleList                 | 7 M   \n",
            "41  | model.encoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "42  | model.encoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "43  | model.encoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "44  | model.encoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "45  | model.encoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "46  | model.encoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "47  | model.encoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "48  | model.encoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \n",
            "49  | model.encoder.block.2.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "50  | model.encoder.block.2.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "51  | model.encoder.block.2.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "52  | model.encoder.block.2.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "53  | model.encoder.block.2.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "54  | model.encoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "55  | model.encoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \n",
            "56  | model.encoder.block.3                                                 | T5Block                    | 7 M   \n",
            "57  | model.encoder.block.3.layer                                           | ModuleList                 | 7 M   \n",
            "58  | model.encoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "59  | model.encoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "60  | model.encoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "61  | model.encoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "62  | model.encoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "63  | model.encoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "64  | model.encoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "65  | model.encoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \n",
            "66  | model.encoder.block.3.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "67  | model.encoder.block.3.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "68  | model.encoder.block.3.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "69  | model.encoder.block.3.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "70  | model.encoder.block.3.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "71  | model.encoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "72  | model.encoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \n",
            "73  | model.encoder.block.4                                                 | T5Block                    | 7 M   \n",
            "74  | model.encoder.block.4.layer                                           | ModuleList                 | 7 M   \n",
            "75  | model.encoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "76  | model.encoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "77  | model.encoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "78  | model.encoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "79  | model.encoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "80  | model.encoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "81  | model.encoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "82  | model.encoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \n",
            "83  | model.encoder.block.4.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "84  | model.encoder.block.4.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "85  | model.encoder.block.4.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "86  | model.encoder.block.4.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "87  | model.encoder.block.4.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "88  | model.encoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "89  | model.encoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \n",
            "90  | model.encoder.block.5                                                 | T5Block                    | 7 M   \n",
            "91  | model.encoder.block.5.layer                                           | ModuleList                 | 7 M   \n",
            "92  | model.encoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "93  | model.encoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "94  | model.encoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "95  | model.encoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "96  | model.encoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "97  | model.encoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "98  | model.encoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "99  | model.encoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \n",
            "100 | model.encoder.block.5.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "101 | model.encoder.block.5.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "102 | model.encoder.block.5.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "103 | model.encoder.block.5.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "104 | model.encoder.block.5.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "105 | model.encoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "106 | model.encoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \n",
            "107 | model.encoder.block.6                                                 | T5Block                    | 7 M   \n",
            "108 | model.encoder.block.6.layer                                           | ModuleList                 | 7 M   \n",
            "109 | model.encoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "110 | model.encoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "111 | model.encoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "112 | model.encoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "113 | model.encoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "114 | model.encoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "115 | model.encoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "116 | model.encoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \n",
            "117 | model.encoder.block.6.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "118 | model.encoder.block.6.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "119 | model.encoder.block.6.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "120 | model.encoder.block.6.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "121 | model.encoder.block.6.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "122 | model.encoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "123 | model.encoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \n",
            "124 | model.encoder.block.7                                                 | T5Block                    | 7 M   \n",
            "125 | model.encoder.block.7.layer                                           | ModuleList                 | 7 M   \n",
            "126 | model.encoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "127 | model.encoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "128 | model.encoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "129 | model.encoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "130 | model.encoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "131 | model.encoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "132 | model.encoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "133 | model.encoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \n",
            "134 | model.encoder.block.7.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "135 | model.encoder.block.7.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "136 | model.encoder.block.7.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "137 | model.encoder.block.7.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "138 | model.encoder.block.7.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "139 | model.encoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "140 | model.encoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \n",
            "141 | model.encoder.block.8                                                 | T5Block                    | 7 M   \n",
            "142 | model.encoder.block.8.layer                                           | ModuleList                 | 7 M   \n",
            "143 | model.encoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "144 | model.encoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "145 | model.encoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "146 | model.encoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "147 | model.encoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "148 | model.encoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "149 | model.encoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "150 | model.encoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \n",
            "151 | model.encoder.block.8.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "152 | model.encoder.block.8.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "153 | model.encoder.block.8.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "154 | model.encoder.block.8.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "155 | model.encoder.block.8.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "156 | model.encoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "157 | model.encoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \n",
            "158 | model.encoder.block.9                                                 | T5Block                    | 7 M   \n",
            "159 | model.encoder.block.9.layer                                           | ModuleList                 | 7 M   \n",
            "160 | model.encoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "161 | model.encoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "162 | model.encoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "163 | model.encoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "164 | model.encoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "165 | model.encoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "166 | model.encoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "167 | model.encoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \n",
            "168 | model.encoder.block.9.layer.1                                         | T5LayerFF                  | 4 M   \n",
            "169 | model.encoder.block.9.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "170 | model.encoder.block.9.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "171 | model.encoder.block.9.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "172 | model.encoder.block.9.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "173 | model.encoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "174 | model.encoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \n",
            "175 | model.encoder.block.10                                                | T5Block                    | 7 M   \n",
            "176 | model.encoder.block.10.layer                                          | ModuleList                 | 7 M   \n",
            "177 | model.encoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "178 | model.encoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "179 | model.encoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "180 | model.encoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "181 | model.encoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "182 | model.encoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "183 | model.encoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "184 | model.encoder.block.10.layer.0.dropout                                | Dropout                    | 0     \n",
            "185 | model.encoder.block.10.layer.1                                        | T5LayerFF                  | 4 M   \n",
            "186 | model.encoder.block.10.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "187 | model.encoder.block.10.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "188 | model.encoder.block.10.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "189 | model.encoder.block.10.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "190 | model.encoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "191 | model.encoder.block.10.layer.1.dropout                                | Dropout                    | 0     \n",
            "192 | model.encoder.block.11                                                | T5Block                    | 7 M   \n",
            "193 | model.encoder.block.11.layer                                          | ModuleList                 | 7 M   \n",
            "194 | model.encoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "195 | model.encoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "196 | model.encoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "197 | model.encoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "198 | model.encoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "199 | model.encoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "200 | model.encoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "201 | model.encoder.block.11.layer.0.dropout                                | Dropout                    | 0     \n",
            "202 | model.encoder.block.11.layer.1                                        | T5LayerFF                  | 4 M   \n",
            "203 | model.encoder.block.11.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "204 | model.encoder.block.11.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "205 | model.encoder.block.11.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "206 | model.encoder.block.11.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "207 | model.encoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "208 | model.encoder.block.11.layer.1.dropout                                | Dropout                    | 0     \n",
            "209 | model.encoder.final_layer_norm                                        | T5LayerNorm                | 768   \n",
            "210 | model.encoder.dropout                                                 | Dropout                    | 0     \n",
            "211 | model.decoder                                                         | T5Stack                    | 137 M \n",
            "212 | model.decoder.block                                                   | ModuleList                 | 113 M \n",
            "213 | model.decoder.block.0                                                 | T5Block                    | 9 M   \n",
            "214 | model.decoder.block.0.layer                                           | ModuleList                 | 9 M   \n",
            "215 | model.decoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "216 | model.decoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "217 | model.decoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "218 | model.decoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "219 | model.decoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "220 | model.decoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \n",
            "222 | model.decoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "223 | model.decoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \n",
            "224 | model.decoder.block.0.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "225 | model.decoder.block.0.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "226 | model.decoder.block.0.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "227 | model.decoder.block.0.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "228 | model.decoder.block.0.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "229 | model.decoder.block.0.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding                  | 384   \n",
            "231 | model.decoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "232 | model.decoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \n",
            "233 | model.decoder.block.0.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "234 | model.decoder.block.0.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "235 | model.decoder.block.0.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "236 | model.decoder.block.0.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "237 | model.decoder.block.0.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "238 | model.decoder.block.0.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "239 | model.decoder.block.0.layer.2.dropout                                 | Dropout                    | 0     \n",
            "240 | model.decoder.block.1                                                 | T5Block                    | 9 M   \n",
            "241 | model.decoder.block.1.layer                                           | ModuleList                 | 9 M   \n",
            "242 | model.decoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "243 | model.decoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "244 | model.decoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "245 | model.decoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "246 | model.decoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "247 | model.decoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "248 | model.decoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "249 | model.decoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \n",
            "250 | model.decoder.block.1.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "251 | model.decoder.block.1.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "252 | model.decoder.block.1.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "253 | model.decoder.block.1.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "254 | model.decoder.block.1.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "255 | model.decoder.block.1.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "256 | model.decoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "257 | model.decoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \n",
            "258 | model.decoder.block.1.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "259 | model.decoder.block.1.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "260 | model.decoder.block.1.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "261 | model.decoder.block.1.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "262 | model.decoder.block.1.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "263 | model.decoder.block.1.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "264 | model.decoder.block.1.layer.2.dropout                                 | Dropout                    | 0     \n",
            "265 | model.decoder.block.2                                                 | T5Block                    | 9 M   \n",
            "266 | model.decoder.block.2.layer                                           | ModuleList                 | 9 M   \n",
            "267 | model.decoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "268 | model.decoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "269 | model.decoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "270 | model.decoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "271 | model.decoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "272 | model.decoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "273 | model.decoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "274 | model.decoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \n",
            "275 | model.decoder.block.2.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "276 | model.decoder.block.2.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "277 | model.decoder.block.2.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "278 | model.decoder.block.2.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "279 | model.decoder.block.2.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "280 | model.decoder.block.2.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "281 | model.decoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "282 | model.decoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \n",
            "283 | model.decoder.block.2.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "284 | model.decoder.block.2.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "285 | model.decoder.block.2.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "286 | model.decoder.block.2.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "287 | model.decoder.block.2.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "288 | model.decoder.block.2.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "289 | model.decoder.block.2.layer.2.dropout                                 | Dropout                    | 0     \n",
            "290 | model.decoder.block.3                                                 | T5Block                    | 9 M   \n",
            "291 | model.decoder.block.3.layer                                           | ModuleList                 | 9 M   \n",
            "292 | model.decoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "293 | model.decoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "294 | model.decoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "295 | model.decoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "296 | model.decoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "297 | model.decoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "298 | model.decoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "299 | model.decoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \n",
            "300 | model.decoder.block.3.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "301 | model.decoder.block.3.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "302 | model.decoder.block.3.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "303 | model.decoder.block.3.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "304 | model.decoder.block.3.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "305 | model.decoder.block.3.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "306 | model.decoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "307 | model.decoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \n",
            "308 | model.decoder.block.3.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "309 | model.decoder.block.3.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "310 | model.decoder.block.3.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "311 | model.decoder.block.3.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "312 | model.decoder.block.3.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "313 | model.decoder.block.3.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "314 | model.decoder.block.3.layer.2.dropout                                 | Dropout                    | 0     \n",
            "315 | model.decoder.block.4                                                 | T5Block                    | 9 M   \n",
            "316 | model.decoder.block.4.layer                                           | ModuleList                 | 9 M   \n",
            "317 | model.decoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "318 | model.decoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "319 | model.decoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "320 | model.decoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "321 | model.decoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "322 | model.decoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "323 | model.decoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "324 | model.decoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \n",
            "325 | model.decoder.block.4.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "326 | model.decoder.block.4.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "327 | model.decoder.block.4.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "328 | model.decoder.block.4.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "329 | model.decoder.block.4.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "330 | model.decoder.block.4.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "331 | model.decoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "332 | model.decoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \n",
            "333 | model.decoder.block.4.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "334 | model.decoder.block.4.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "335 | model.decoder.block.4.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "336 | model.decoder.block.4.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "337 | model.decoder.block.4.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "338 | model.decoder.block.4.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "339 | model.decoder.block.4.layer.2.dropout                                 | Dropout                    | 0     \n",
            "340 | model.decoder.block.5                                                 | T5Block                    | 9 M   \n",
            "341 | model.decoder.block.5.layer                                           | ModuleList                 | 9 M   \n",
            "342 | model.decoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "343 | model.decoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "344 | model.decoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "345 | model.decoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "346 | model.decoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "347 | model.decoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "348 | model.decoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "349 | model.decoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \n",
            "350 | model.decoder.block.5.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "351 | model.decoder.block.5.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "352 | model.decoder.block.5.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "353 | model.decoder.block.5.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "354 | model.decoder.block.5.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "355 | model.decoder.block.5.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "356 | model.decoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "357 | model.decoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \n",
            "358 | model.decoder.block.5.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "359 | model.decoder.block.5.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "360 | model.decoder.block.5.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "361 | model.decoder.block.5.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "362 | model.decoder.block.5.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "363 | model.decoder.block.5.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "364 | model.decoder.block.5.layer.2.dropout                                 | Dropout                    | 0     \n",
            "365 | model.decoder.block.6                                                 | T5Block                    | 9 M   \n",
            "366 | model.decoder.block.6.layer                                           | ModuleList                 | 9 M   \n",
            "367 | model.decoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "368 | model.decoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "369 | model.decoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "370 | model.decoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "371 | model.decoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "372 | model.decoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "373 | model.decoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "374 | model.decoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \n",
            "375 | model.decoder.block.6.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "376 | model.decoder.block.6.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "377 | model.decoder.block.6.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "378 | model.decoder.block.6.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "379 | model.decoder.block.6.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "380 | model.decoder.block.6.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "381 | model.decoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "382 | model.decoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \n",
            "383 | model.decoder.block.6.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "384 | model.decoder.block.6.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "385 | model.decoder.block.6.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "386 | model.decoder.block.6.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "387 | model.decoder.block.6.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "388 | model.decoder.block.6.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "389 | model.decoder.block.6.layer.2.dropout                                 | Dropout                    | 0     \n",
            "390 | model.decoder.block.7                                                 | T5Block                    | 9 M   \n",
            "391 | model.decoder.block.7.layer                                           | ModuleList                 | 9 M   \n",
            "392 | model.decoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "393 | model.decoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "394 | model.decoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "395 | model.decoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "396 | model.decoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "397 | model.decoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "398 | model.decoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "399 | model.decoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \n",
            "400 | model.decoder.block.7.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "401 | model.decoder.block.7.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "402 | model.decoder.block.7.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "403 | model.decoder.block.7.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "404 | model.decoder.block.7.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "405 | model.decoder.block.7.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "406 | model.decoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "407 | model.decoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \n",
            "408 | model.decoder.block.7.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "409 | model.decoder.block.7.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "410 | model.decoder.block.7.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "411 | model.decoder.block.7.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "412 | model.decoder.block.7.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "413 | model.decoder.block.7.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "414 | model.decoder.block.7.layer.2.dropout                                 | Dropout                    | 0     \n",
            "415 | model.decoder.block.8                                                 | T5Block                    | 9 M   \n",
            "416 | model.decoder.block.8.layer                                           | ModuleList                 | 9 M   \n",
            "417 | model.decoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "418 | model.decoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "419 | model.decoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "420 | model.decoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "421 | model.decoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "422 | model.decoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "423 | model.decoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "424 | model.decoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \n",
            "425 | model.decoder.block.8.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "426 | model.decoder.block.8.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "427 | model.decoder.block.8.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "428 | model.decoder.block.8.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "429 | model.decoder.block.8.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "430 | model.decoder.block.8.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "431 | model.decoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "432 | model.decoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \n",
            "433 | model.decoder.block.8.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "434 | model.decoder.block.8.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "435 | model.decoder.block.8.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "436 | model.decoder.block.8.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "437 | model.decoder.block.8.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "438 | model.decoder.block.8.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "439 | model.decoder.block.8.layer.2.dropout                                 | Dropout                    | 0     \n",
            "440 | model.decoder.block.9                                                 | T5Block                    | 9 M   \n",
            "441 | model.decoder.block.9.layer                                           | ModuleList                 | 9 M   \n",
            "442 | model.decoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \n",
            "443 | model.decoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \n",
            "444 | model.decoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \n",
            "445 | model.decoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \n",
            "446 | model.decoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \n",
            "447 | model.decoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \n",
            "448 | model.decoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \n",
            "449 | model.decoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \n",
            "450 | model.decoder.block.9.layer.1                                         | T5LayerCrossAttention      | 2 M   \n",
            "451 | model.decoder.block.9.layer.1.EncDecAttention                         | T5Attention                | 2 M   \n",
            "452 | model.decoder.block.9.layer.1.EncDecAttention.q                       | Linear                     | 589 K \n",
            "453 | model.decoder.block.9.layer.1.EncDecAttention.k                       | Linear                     | 589 K \n",
            "454 | model.decoder.block.9.layer.1.EncDecAttention.v                       | Linear                     | 589 K \n",
            "455 | model.decoder.block.9.layer.1.EncDecAttention.o                       | Linear                     | 589 K \n",
            "456 | model.decoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \n",
            "457 | model.decoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \n",
            "458 | model.decoder.block.9.layer.2                                         | T5LayerFF                  | 4 M   \n",
            "459 | model.decoder.block.9.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \n",
            "460 | model.decoder.block.9.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \n",
            "461 | model.decoder.block.9.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \n",
            "462 | model.decoder.block.9.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \n",
            "463 | model.decoder.block.9.layer.2.layer_norm                              | T5LayerNorm                | 768   \n",
            "464 | model.decoder.block.9.layer.2.dropout                                 | Dropout                    | 0     \n",
            "465 | model.decoder.block.10                                                | T5Block                    | 9 M   \n",
            "466 | model.decoder.block.10.layer                                          | ModuleList                 | 9 M   \n",
            "467 | model.decoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "468 | model.decoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "469 | model.decoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "470 | model.decoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "471 | model.decoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "472 | model.decoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "473 | model.decoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "474 | model.decoder.block.10.layer.0.dropout                                | Dropout                    | 0     \n",
            "475 | model.decoder.block.10.layer.1                                        | T5LayerCrossAttention      | 2 M   \n",
            "476 | model.decoder.block.10.layer.1.EncDecAttention                        | T5Attention                | 2 M   \n",
            "477 | model.decoder.block.10.layer.1.EncDecAttention.q                      | Linear                     | 589 K \n",
            "478 | model.decoder.block.10.layer.1.EncDecAttention.k                      | Linear                     | 589 K \n",
            "479 | model.decoder.block.10.layer.1.EncDecAttention.v                      | Linear                     | 589 K \n",
            "480 | model.decoder.block.10.layer.1.EncDecAttention.o                      | Linear                     | 589 K \n",
            "481 | model.decoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "482 | model.decoder.block.10.layer.1.dropout                                | Dropout                    | 0     \n",
            "483 | model.decoder.block.10.layer.2                                        | T5LayerFF                  | 4 M   \n",
            "484 | model.decoder.block.10.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "485 | model.decoder.block.10.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "486 | model.decoder.block.10.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "487 | model.decoder.block.10.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "488 | model.decoder.block.10.layer.2.layer_norm                             | T5LayerNorm                | 768   \n",
            "489 | model.decoder.block.10.layer.2.dropout                                | Dropout                    | 0     \n",
            "490 | model.decoder.block.11                                                | T5Block                    | 9 M   \n",
            "491 | model.decoder.block.11.layer                                          | ModuleList                 | 9 M   \n",
            "492 | model.decoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \n",
            "493 | model.decoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \n",
            "494 | model.decoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \n",
            "495 | model.decoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \n",
            "496 | model.decoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \n",
            "497 | model.decoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \n",
            "498 | model.decoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \n",
            "499 | model.decoder.block.11.layer.0.dropout                                | Dropout                    | 0     \n",
            "500 | model.decoder.block.11.layer.1                                        | T5LayerCrossAttention      | 2 M   \n",
            "501 | model.decoder.block.11.layer.1.EncDecAttention                        | T5Attention                | 2 M   \n",
            "502 | model.decoder.block.11.layer.1.EncDecAttention.q                      | Linear                     | 589 K \n",
            "503 | model.decoder.block.11.layer.1.EncDecAttention.k                      | Linear                     | 589 K \n",
            "504 | model.decoder.block.11.layer.1.EncDecAttention.v                      | Linear                     | 589 K \n",
            "505 | model.decoder.block.11.layer.1.EncDecAttention.o                      | Linear                     | 589 K \n",
            "506 | model.decoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \n",
            "507 | model.decoder.block.11.layer.1.dropout                                | Dropout                    | 0     \n",
            "508 | model.decoder.block.11.layer.2                                        | T5LayerFF                  | 4 M   \n",
            "509 | model.decoder.block.11.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \n",
            "510 | model.decoder.block.11.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \n",
            "511 | model.decoder.block.11.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \n",
            "512 | model.decoder.block.11.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \n",
            "513 | model.decoder.block.11.layer.2.layer_norm                             | T5LayerNorm                | 768   \n",
            "514 | model.decoder.block.11.layer.2.dropout                                | Dropout                    | 0     \n",
            "515 | model.decoder.final_layer_norm                                        | T5LayerNorm                | 768   \n",
            "516 | model.decoder.dropout                                                 | Dropout                    | 0     \n",
            "517 | model.lm_head                                                         | Linear                     | 24 M  \n",
            "INFO:__main__:LOOKING AT swag_data dev\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "8e79d03deee94b299431330441bd64c8",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:LOOKING AT swag_data train\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\r"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:LOOKING AT swag_data dev\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "68705cee3df5458fb5145046337d925c",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "/pytorch/torch/csrc/utils/python_arg_parser.cpp:756: UserWarning: This overload of add_ is deprecated:\n",
            "\tadd_(Number alpha, Tensor other)\n",
            "Consider using one of the following signatures instead:\n",
            "\tadd_(Tensor other, *, Number alpha)\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "69f6eb1cb0434128961b5d83529813c5",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:***** Validation results *****\n",
            "INFO:__main__:avg_val_loss = tensor(0.3535, device='cuda:0')\n",
            "\n",
            "INFO:__main__:loss = tensor(0.3080, device='cuda:0')\n",
            "\n",
            "INFO:__main__:train_loss = tensor(0.3080, device='cuda:0')\n",
            "\n",
            "INFO:__main__:val_loss = tensor(0.3535, device='cuda:0')\n",
            "\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "0cfc8fa73f164b4fa5ddcbc3f115ef9b",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:***** Validation results *****\n",
            "INFO:__main__:avg_train_loss = tensor(0.5107, device='cuda:0')\n",
            "\n",
            "INFO:__main__:avg_val_loss = tensor(0.3268, device='cuda:0')\n",
            "\n",
            "INFO:__main__:epoch = 0\n",
            "\n",
            "INFO:__main__:loss = tensor(0.5484, device='cuda:0')\n",
            "\n",
            "INFO:__main__:train_loss = tensor(0.5484, device='cuda:0')\n",
            "\n",
            "INFO:__main__:val_loss = tensor(0.3268, device='cuda:0')\n",
            "\n",
            "INFO:lightning:Detected KeyboardInterrupt, attempting graceful shutdown...\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "1"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 22
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "o1ZB_6SK7V-3",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AgNV3TMzqSvj",
        "colab_type": "text"
      },
      "source": [
        "### Eval"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "gFFOwfXyqc4_",
        "colab": {}
      },
      "source": [
        "import textwrap\n",
        "from tqdm.auto import tqdm\n",
        "from sklearn import metrics"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "rsYCq3Lwqc5Y",
        "outputId": "51f7bd88-2441-42be-e8f3-adc0337a164c",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "dataset =  SwagDataset(tokenizer, data_dir='swag_data', type_path='val')\n",
        "loader = DataLoader(dataset, batch_size=32, num_workers=4)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:__main__:LOOKING AT swag_data dev\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "outputId": "81e7d67d-1d15-4dea-a552-695cfe8ef105",
        "id": "KHwMBQNjqc5h",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 66,
          "referenced_widgets": [
            "1597779d89464892885045be715890a8",
            "8a42468ed6b945e8bfce1803f3ea4452",
            "f87eae824cf1492b9555b78648a9f261",
            "6cd0d574b5fd43588b8d492674125218",
            "17b25142ac744ba882e2bbd1f42c1db2",
            "09185d325ef84c1fad7b07fbd9eeed31",
            "ba31765789dc46229493674dab21921d",
            "a9dd88fb73374e108482b80993b998eb"
          ]
        }
      },
      "source": [
        "model.model.eval()\n",
        "outputs = []\n",
        "targets = []\n",
        "for batch in tqdm(loader):\n",
        "  outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \n",
        "                              attention_mask=batch['source_mask'].cuda(), \n",
        "                              max_length=2)\n",
        "\n",
        "  dec = [tokenizer.decode(ids) for ids in outs]\n",
        "  target = [tokenizer.decode(ids) for ids in batch[\"target_ids\"]]\n",
        "  \n",
        "  outputs.extend(dec)\n",
        "  targets.extend(target)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "1597779d89464892885045be715890a8",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, max=626.0), HTML(value='')))"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZbTValmYq15r",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "for i, out in enumerate(outputs):\n",
        "  if out not in \"1234\":\n",
        "    print(i, 'detected invalid prediction')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jN35n2pas-pF",
        "colab_type": "code",
        "outputId": "be8a3507-8e66-479d-c41c-dd9cb0603742",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "metrics.accuracy_score(targets, outputs)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.7397280815755274"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 28
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "t_WaMutznvGb",
        "colab_type": "text"
      },
      "source": [
        "This is great! We have achieved almost 74% accuracy with this simple formulation. This is great becuase with BERT like models to make a prediction on single example the model needs to do 4 forward passes, one for each possible endings and then the logits are concatenated together for all 4 passes and then passed through final softmax layer to produce 4 probabilities. This approach needs only a single pass for one example."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rFgOHlW_tHPd",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}